xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision fe2557b6eb55be3c2d36e1ee396e0f10e41bd214)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static size_t
166 compute_user_iovec_length(iovec* userVec, uint32 count)
167 {
168 	size_t length = 0;
169 
170 	for (uint32 i = 0; i < count; i++) {
171 		iovec vec;
172 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
173 			return 0;
174 
175 		length += vec.iov_len;
176 	}
177 
178 	return length;
179 }
180 
181 
182 static status_t
183 create_socket(int family, int type, int protocol, net_socket_private** _socket)
184 {
185 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
186 	if (socket == NULL)
187 		return B_NO_MEMORY;
188 	status_t status = socket->InitCheck();
189 	if (status != B_OK) {
190 		delete socket;
191 		return status;
192 	}
193 
194 	socket->family = family;
195 	socket->type = type;
196 	socket->protocol = protocol;
197 
198 	status = get_domain_protocols(socket);
199 	if (status != B_OK) {
200 		delete socket;
201 		return status;
202 	}
203 
204 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
205 		socket->type, socket->protocol);
206 
207 #ifdef TRACE_SOCKET
208 	net_protocol* current = socket->first_protocol;
209 	for (int i = 0; current != NULL; current = current->next, i++)
210 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
211 #endif
212 
213 	*_socket = socket;
214 	return B_OK;
215 }
216 
217 
218 static status_t
219 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
220 	void* data, size_t dataLen)
221 {
222 	cmsghdr* header = (cmsghdr*)data;
223 
224 	if (dataLen == 0)
225 		return B_OK;
226 
227 	if (socket->first_info->add_ancillary_data == NULL)
228 		return B_NOT_SUPPORTED;
229 
230 	while (true) {
231 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
232 			return B_BAD_VALUE;
233 
234 		status_t status = socket->first_info->add_ancillary_data(
235 			socket->first_protocol, container, header);
236 		if (status != B_OK)
237 			return status;
238 
239 		if (dataLen <= _ALIGN(header->cmsg_len))
240 			break;
241 		dataLen -= _ALIGN(header->cmsg_len);
242 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
243 	}
244 
245 	return B_OK;
246 }
247 
248 
249 static status_t
250 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
251 	msghdr* messageHeader)
252 {
253 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
254 	int dataBufferLen = messageHeader->msg_controllen;
255 
256 	if (container == NULL || dataBuffer == NULL) {
257 		messageHeader->msg_controllen = 0;
258 		return B_OK;
259 	}
260 
261 	ancillary_data_header header;
262 	void* data = NULL;
263 
264 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
265 		if (socket->first_info->process_ancillary_data == NULL)
266 			return B_NOT_SUPPORTED;
267 
268 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
269 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
270 		if (bytesWritten < 0)
271 			return bytesWritten;
272 
273 		dataBuffer += bytesWritten;
274 		dataBufferLen -= bytesWritten;
275 	}
276 
277 	messageHeader->msg_controllen -= dataBufferLen;
278 
279 	return B_OK;
280 }
281 
282 
283 static status_t
284 process_ancillary_data(net_socket* socket,
285 	net_buffer* buffer, msghdr* messageHeader)
286 {
287 	void *dataBuffer = messageHeader->msg_control;
288 	ssize_t bytesWritten;
289 
290 	if (dataBuffer == NULL) {
291 		messageHeader->msg_controllen = 0;
292 		return B_OK;
293 	}
294 
295 	if (socket->first_info->process_ancillary_data_no_container == NULL)
296 		return B_NOT_SUPPORTED;
297 
298 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
299 		socket->first_protocol, buffer, dataBuffer,
300 		messageHeader->msg_controllen);
301 	if (bytesWritten < 0)
302 		return bytesWritten;
303 	messageHeader->msg_controllen = bytesWritten;
304 
305 	return B_OK;
306 }
307 
308 
309 static ssize_t
310 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
311 	size_t length, int flags)
312 {
313 	iovec stackVec = { data, length };
314 	iovec* vecs = header ? header->msg_iov : &stackVec;
315 	int vecCount = header ? header->msg_iovlen : 1;
316 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
317 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
318 
319 	ancillary_data_container* ancillaryData = NULL;
320 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
321 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
322 		addressLen);
323 	if (bytesRead < 0)
324 		return bytesRead;
325 
326 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(ancillaryData,
327 		&delete_ancillary_data_container);
328 
329 	// process ancillary data
330 	if (header != NULL) {
331 		status_t status = process_ancillary_data(socket, ancillaryData, header);
332 		if (status != B_OK)
333 			return status;
334 
335 		header->msg_flags = 0;
336 	}
337 
338 	return bytesRead;
339 }
340 
341 
342 #if ENABLE_DEBUGGER_COMMANDS
343 
344 
345 static void
346 print_socket_line(net_socket_private* socket, const char* prefix)
347 {
348 	BReference<net_socket_private> parent = socket->parent.GetReference();
349 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
350 		socket->family, socket->type, socket->protocol, socket->owner,
351 		socket->first_protocol, socket->first_info, parent.Get(),
352 		parent.Get() != NULL ? socket->is_connected ? " (c)" : " (p)" : "");
353 }
354 
355 
356 static int
357 dump_socket(int argc, char** argv)
358 {
359 	if (argc < 2) {
360 		kprintf("usage: %s [address]\n", argv[0]);
361 		return 0;
362 	}
363 
364 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
365 
366 	kprintf("SOCKET %p\n", socket);
367 	kprintf("  family.type.protocol: %d.%d.%d\n",
368 		socket->family, socket->type, socket->protocol);
369 	BReference<net_socket_private> parent = socket->parent.GetReference();
370 	kprintf("  parent:               %p\n", parent.Get());
371 	kprintf("  first protocol:       %p\n", socket->first_protocol);
372 	kprintf("  first module_info:    %p\n", socket->first_info);
373 	kprintf("  options:              %x\n", socket->options);
374 	kprintf("  linger:               %d\n", socket->linger);
375 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
376 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
377 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
378 	kprintf("  is connected:         %d\n", socket->is_connected);
379 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
380 
381 	if (socket->child_count == 0)
382 		return 0;
383 
384 	kprintf("    pending children:\n");
385 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
386 	while (net_socket_private* child = iterator.Next()) {
387 		print_socket_line(child, "      ");
388 	}
389 
390 	kprintf("    connected children:\n");
391 	iterator = socket->connected_children.GetIterator();
392 	while (net_socket_private* child = iterator.Next()) {
393 		print_socket_line(child, "      ");
394 	}
395 
396 	return 0;
397 }
398 
399 
400 static int
401 dump_sockets(int argc, char** argv)
402 {
403 	kprintf("address        kind  owner protocol   module_info parent\n");
404 
405 	SocketList::Iterator iterator = sSocketList.GetIterator();
406 	while (net_socket_private* socket = iterator.Next()) {
407 		print_socket_line(socket, "");
408 
409 		SocketList::Iterator childIterator
410 			= socket->pending_children.GetIterator();
411 		while (net_socket_private* child = childIterator.Next()) {
412 			print_socket_line(child, " ");
413 		}
414 
415 		childIterator = socket->connected_children.GetIterator();
416 		while (net_socket_private* child = childIterator.Next()) {
417 			print_socket_line(child, " ");
418 		}
419 	}
420 
421 	return 0;
422 }
423 
424 
425 #endif	// ENABLE_DEBUGGER_COMMANDS
426 
427 
428 //	#pragma mark -
429 
430 
431 status_t
432 socket_open(int family, int type, int protocol, net_socket** _socket)
433 {
434 	net_socket_private* socket;
435 	status_t status = create_socket(family, type, protocol, &socket);
436 	if (status != B_OK)
437 		return status;
438 
439 	status = socket->first_info->open(socket->first_protocol);
440 	if (status != B_OK) {
441 		delete socket;
442 		return status;
443 	}
444 
445 	socket->owner = team_get_current_team_id();
446 	socket->is_in_socket_list = true;
447 
448 	mutex_lock(&sSocketLock);
449 	sSocketList.Add(socket);
450 	mutex_unlock(&sSocketLock);
451 
452 	*_socket = socket;
453 	return B_OK;
454 }
455 
456 
457 status_t
458 socket_close(net_socket* _socket)
459 {
460 	net_socket_private* socket = (net_socket_private*)_socket;
461 	return socket->first_info->close(socket->first_protocol);
462 }
463 
464 
465 void
466 socket_free(net_socket* _socket)
467 {
468 	net_socket_private* socket = (net_socket_private*)_socket;
469 	socket->first_info->free(socket->first_protocol);
470 	socket->ReleaseReference();
471 }
472 
473 
474 status_t
475 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
476 	size_t* _length)
477 {
478 	return -1;
479 }
480 
481 
482 status_t
483 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
484 	size_t* _length)
485 {
486 	if (socket->peer.ss_len == 0)
487 		return ECONNRESET;
488 
489 	if (socket->address.ss_len == 0) {
490 		// try to bind first
491 		status_t status = socket_bind(socket, NULL, 0);
492 		if (status != B_OK)
493 			return status;
494 	}
495 
496 	// TODO: useful, maybe even computed header space!
497 	net_buffer* buffer = gNetBufferModule.create(256);
498 	if (buffer == NULL)
499 		return ENOBUFS;
500 
501 	// copy data into buffer
502 
503 	for (uint32 i = 0; i < vecCount; i++) {
504 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
505 				vecs[i].iov_len) < B_OK) {
506 			gNetBufferModule.free(buffer);
507 			return ENOBUFS;
508 		}
509 	}
510 
511 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
512 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
513 	size_t size = buffer->size;
514 
515 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
516 		buffer);
517 	if (bytesWritten < B_OK) {
518 		if (buffer->size != size) {
519 			// this appears to be a partial write
520 			*_length = size - buffer->size;
521 		}
522 		gNetBufferModule.free(buffer);
523 		return bytesWritten;
524 	}
525 
526 	*_length = bytesWritten;
527 	return B_OK;
528 }
529 
530 
531 status_t
532 socket_control(net_socket* socket, int32 op, void* data, size_t length)
533 {
534 	switch (op) {
535 		case FIONBIO:
536 		{
537 			if (data == NULL)
538 				return B_BAD_VALUE;
539 
540 			int value;
541 			if (is_syscall()) {
542 				if (!IS_USER_ADDRESS(data)
543 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
544 					return B_BAD_ADDRESS;
545 				}
546 			} else
547 				value = *(int*)data;
548 
549 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
550 				sizeof(int));
551 		}
552 
553 		case FIONREAD:
554 		{
555 			if (data == NULL)
556 				return B_BAD_VALUE;
557 
558 			int available = (int)socket_read_avail(socket);
559 			if (available < 0)
560 				return available;
561 
562 			if (is_syscall()) {
563 				if (!IS_USER_ADDRESS(data)
564 					|| user_memcpy(data, &available, sizeof(available))
565 						!= B_OK) {
566 					return B_BAD_ADDRESS;
567 				}
568 			} else
569 				*(int*)data = available;
570 
571 			return B_OK;
572 		}
573 
574 		case B_SET_BLOCKING_IO:
575 		case B_SET_NONBLOCKING_IO:
576 		{
577 			int value = op == B_SET_NONBLOCKING_IO;
578 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
579 				sizeof(int));
580 		}
581 	}
582 
583 	return socket->first_info->control(socket->first_protocol,
584 		LEVEL_DRIVER_IOCTL, op, data, &length);
585 }
586 
587 
588 ssize_t
589 socket_read_avail(net_socket* socket)
590 {
591 	return socket->first_info->read_avail(socket->first_protocol);
592 }
593 
594 
595 ssize_t
596 socket_send_avail(net_socket* socket)
597 {
598 	return socket->first_info->send_avail(socket->first_protocol);
599 }
600 
601 
602 status_t
603 socket_send_data(net_socket* socket, net_buffer* buffer)
604 {
605 	return socket->first_info->send_data(socket->first_protocol,
606 		buffer);
607 }
608 
609 
610 status_t
611 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
612 	net_buffer** _buffer)
613 {
614 	status_t status = socket->first_info->read_data(socket->first_protocol,
615 		length, flags, _buffer);
616 	if (status != B_OK)
617 		return status;
618 
619 	if (*_buffer && length < (*_buffer)->size) {
620 		// discard any data behind the amount requested
621 		gNetBufferModule.trim(*_buffer, length);
622 	}
623 
624 	return status;
625 }
626 
627 
628 status_t
629 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
630 {
631 	MutexLocker locker(sSocketLock);
632 
633 	net_socket_private* socket = NULL;
634 	SocketList::Iterator iterator = sSocketList.GetIterator();
635 	uint32 cookie = *_cookie;
636 	uint32 count = 0;
637 
638 	while (true) {
639 		socket = iterator.Next();
640 		if (socket == NULL)
641 			return B_ENTRY_NOT_FOUND;
642 
643 		// TODO: also traverse the pending connections
644 		if (count == cookie)
645 			break;
646 
647 		if (family == -1 || family == socket->family)
648 			count++;
649 	}
650 
651 	*_cookie = count + 1;
652 
653 	stat->family = socket->family;
654 	stat->type = socket->type;
655 	stat->protocol = socket->protocol;
656 	stat->owner = socket->owner;
657 	stat->state[0] = '\0';
658 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
659 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
660 	stat->receive_queue_size = 0;
661 	stat->send_queue_size = 0;
662 
663 	// fill in protocol specific data (if supported by the protocol)
664 	size_t length = sizeof(net_stat);
665 	socket->first_info->control(socket->first_protocol, socket->protocol,
666 		NET_STAT_SOCKET, stat, &length);
667 
668 	return B_OK;
669 }
670 
671 
672 //	#pragma mark - connections
673 
674 
675 bool
676 socket_acquire(net_socket* _socket)
677 {
678 	net_socket_private* socket = (net_socket_private*)_socket;
679 
680 	// During destruction, the socket might still be accessible over its
681 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
682 	// socket anymore -- while not obvious, the endpoint protocol is responsible
683 	// for the proper locking here.
684 	if (socket->CountReferences() == 0)
685 		return false;
686 
687 	socket->AcquireReference();
688 	return true;
689 }
690 
691 
692 bool
693 socket_release(net_socket* _socket)
694 {
695 	net_socket_private* socket = (net_socket_private*)_socket;
696 	return socket->ReleaseReference();
697 }
698 
699 
700 status_t
701 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
702 {
703 	net_socket_private* parent = (net_socket_private*)_parent;
704 
705 	TRACE("%s(%p)\n", __FUNCTION__, parent);
706 
707 	MutexLocker locker(parent->lock);
708 
709 	// We actually accept more pending connections to compensate for those
710 	// that never complete, and also make sure at least a single connection
711 	// can always be accepted
712 	if (parent->child_count > 3 * parent->max_backlog / 2)
713 		return ENOBUFS;
714 
715 	net_socket_private* socket;
716 	status_t status = create_socket(parent->family, parent->type,
717 		parent->protocol, &socket);
718 	if (status != B_OK)
719 		return status;
720 
721 	// inherit parent's properties
722 	socket->send = parent->send;
723 	socket->receive = parent->receive;
724 	socket->options = parent->options & ~SO_ACCEPTCONN;
725 	socket->linger = parent->linger;
726 	socket->owner = parent->owner;
727 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
728 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
729 
730 	// add to the parent's list of pending connections
731 	parent->pending_children.Add(socket);
732 	socket->parent = parent;
733 	parent->child_count++;
734 
735 	*_socket = socket;
736 	return B_OK;
737 }
738 
739 
740 /*!	Dequeues a connected child from a parent socket.
741 	It also returns a reference with the child socket.
742 */
743 status_t
744 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
745 {
746 	net_socket_private* parent = (net_socket_private*)_parent;
747 
748 	mutex_lock(&parent->lock);
749 
750 	net_socket_private* socket = parent->connected_children.RemoveHead();
751 	if (socket != NULL) {
752 		socket->AcquireReference();
753 		socket->RemoveFromParent();
754 		parent->child_count--;
755 		*_socket = socket;
756 	}
757 
758 	mutex_unlock(&parent->lock);
759 
760 	if (socket == NULL)
761 		return B_ENTRY_NOT_FOUND;
762 
763 	return B_OK;
764 }
765 
766 
767 ssize_t
768 socket_count_connected(net_socket* _parent)
769 {
770 	net_socket_private* parent = (net_socket_private*)_parent;
771 
772 	MutexLocker _(parent->lock);
773 	return parent->connected_children.Count();
774 }
775 
776 
777 status_t
778 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
779 {
780 	net_socket_private* socket = (net_socket_private*)_socket;
781 
782 	// we enforce an upper limit of connections waiting to be accepted
783 	if (backlog > 256)
784 		backlog = 256;
785 
786 	MutexLocker _(socket->lock);
787 
788 	// first remove the pending connections, then the already connected
789 	// ones as needed
790 	net_socket_private* child;
791 	while (socket->child_count > backlog
792 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
793 		child->RemoveFromParent();
794 		socket->child_count--;
795 	}
796 	while (socket->child_count > backlog
797 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
798 		child->RemoveFromParent();
799 		socket->child_count--;
800 	}
801 
802 	socket->max_backlog = backlog;
803 	return B_OK;
804 }
805 
806 
807 /*!	Returns whether or not this socket has a parent. The parent might not be
808 	valid anymore, though.
809 */
810 bool
811 socket_has_parent(net_socket* _socket)
812 {
813 	net_socket_private* socket = (net_socket_private*)_socket;
814 	return socket->parent != NULL;
815 }
816 
817 
818 /*!	The socket has been connected. It will be moved to the connected queue
819 	of its parent socket.
820 */
821 status_t
822 socket_connected(net_socket* _socket)
823 {
824 	net_socket_private* socket = (net_socket_private*)_socket;
825 
826 	TRACE("socket_connected(%p)\n", socket);
827 
828 	BReference<net_socket_private> parent = socket->parent.GetReference();
829 	if (parent.Get() == NULL)
830 		return B_BAD_VALUE;
831 
832 	MutexLocker _(parent->lock);
833 
834 	parent->pending_children.Remove(socket);
835 	parent->connected_children.Add(socket);
836 	socket->is_connected = true;
837 
838 	// notify parent
839 	if (parent->select_pool)
840 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
841 
842 	return B_OK;
843 }
844 
845 
846 /*!	The socket has been aborted. Steals the parent's reference, and releases
847 	it.
848 */
849 status_t
850 socket_aborted(net_socket* _socket)
851 {
852 	net_socket_private* socket = (net_socket_private*)_socket;
853 
854 	TRACE("socket_aborted(%p)\n", socket);
855 
856 	BReference<net_socket_private> parent = socket->parent.GetReference();
857 	if (parent.Get() == NULL)
858 		return B_BAD_VALUE;
859 
860 	MutexLocker _(parent->lock);
861 
862 	if (socket->is_connected)
863 		parent->connected_children.Remove(socket);
864 	else
865 		parent->pending_children.Remove(socket);
866 
867 	parent->child_count--;
868 	socket->RemoveFromParent();
869 
870 	return B_OK;
871 }
872 
873 
874 //	#pragma mark - notifications
875 
876 
877 status_t
878 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
879 {
880 	net_socket_private* socket = (net_socket_private*)_socket;
881 
882 	mutex_lock(&socket->lock);
883 
884 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
885 		event);
886 
887 	mutex_unlock(&socket->lock);
888 
889 	if (status != B_OK)
890 		return status;
891 
892 	// check if the event is already present
893 	// TODO: add support for poll() types
894 
895 	switch (event) {
896 		case B_SELECT_READ:
897 		{
898 			ssize_t available = socket_read_avail(socket);
899 			if ((ssize_t)socket->receive.low_water_mark <= available
900 				|| available < B_OK)
901 				notify_select_event(sync, event);
902 			break;
903 		}
904 		case B_SELECT_WRITE:
905 		{
906 			ssize_t available = socket_send_avail(socket);
907 			if ((ssize_t)socket->send.low_water_mark <= available
908 				|| available < B_OK)
909 				notify_select_event(sync, event);
910 			break;
911 		}
912 		case B_SELECT_ERROR:
913 			if (socket->error != B_OK)
914 				notify_select_event(sync, event);
915 			break;
916 	}
917 
918 	return B_OK;
919 }
920 
921 
922 status_t
923 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
924 {
925 	net_socket_private* socket = (net_socket_private*)_socket;
926 
927 	MutexLocker _(socket->lock);
928 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
929 }
930 
931 
932 status_t
933 socket_notify(net_socket* _socket, uint8 event, int32 value)
934 {
935 	net_socket_private* socket = (net_socket_private*)_socket;
936 	bool notify = true;
937 
938 	switch (event) {
939 		case B_SELECT_READ:
940 			if ((ssize_t)socket->receive.low_water_mark > value
941 				&& value >= B_OK)
942 				notify = false;
943 			break;
944 
945 		case B_SELECT_WRITE:
946 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
947 				notify = false;
948 			break;
949 
950 		case B_SELECT_ERROR:
951 			socket->error = value;
952 			break;
953 	}
954 
955 	MutexLocker _(socket->lock);
956 
957 	if (notify && socket->select_pool != NULL) {
958 		notify_select_event_pool(socket->select_pool, event);
959 
960 		if (event == B_SELECT_ERROR) {
961 			// always notify read/write on error
962 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
963 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
964 		}
965 	}
966 
967 	return B_OK;
968 }
969 
970 
971 //	#pragma mark - standard socket API
972 
973 
974 int
975 socket_accept(net_socket* socket, struct sockaddr* address,
976 	socklen_t* _addressLength, net_socket** _acceptedSocket)
977 {
978 	if ((socket->options & SO_ACCEPTCONN) == 0)
979 		return B_BAD_VALUE;
980 
981 	net_socket* accepted;
982 	status_t status = socket->first_info->accept(socket->first_protocol,
983 		&accepted);
984 	if (status != B_OK)
985 		return status;
986 
987 	if (address && *_addressLength > 0) {
988 		memcpy(address, &accepted->peer, min_c(*_addressLength,
989 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
990 		*_addressLength = accepted->peer.ss_len;
991 	}
992 
993 	*_acceptedSocket = accepted;
994 	return B_OK;
995 }
996 
997 
998 int
999 socket_bind(net_socket* socket, const struct sockaddr* address,
1000 	socklen_t addressLength)
1001 {
1002 	sockaddr empty;
1003 	if (address == NULL) {
1004 		// special - try to bind to an empty address, like INADDR_ANY
1005 		memset(&empty, 0, sizeof(sockaddr));
1006 		empty.sa_len = sizeof(sockaddr);
1007 		empty.sa_family = socket->family;
1008 
1009 		address = &empty;
1010 		addressLength = sizeof(sockaddr);
1011 	}
1012 
1013 	if (socket->address.ss_len != 0) {
1014 		status_t status = socket->first_info->unbind(socket->first_protocol,
1015 			(sockaddr*)&socket->address);
1016 		if (status != B_OK)
1017 			return status;
1018 	}
1019 
1020 	memcpy(&socket->address, address, sizeof(sockaddr));
1021 	socket->address.ss_len = sizeof(sockaddr_storage);
1022 
1023 	status_t status = socket->first_info->bind(socket->first_protocol,
1024 		(sockaddr*)address);
1025 	if (status != B_OK) {
1026 		// clear address again, as binding failed
1027 		socket->address.ss_len = 0;
1028 	}
1029 
1030 	return status;
1031 }
1032 
1033 
1034 int
1035 socket_connect(net_socket* socket, const struct sockaddr* address,
1036 	socklen_t addressLength)
1037 {
1038 	if (address == NULL || addressLength == 0)
1039 		return ENETUNREACH;
1040 
1041 	if (socket->address.ss_len == 0) {
1042 		// try to bind first
1043 		status_t status = socket_bind(socket, NULL, 0);
1044 		if (status != B_OK)
1045 			return status;
1046 	}
1047 
1048 	return socket->first_info->connect(socket->first_protocol, address);
1049 }
1050 
1051 
1052 int
1053 socket_getpeername(net_socket* socket, struct sockaddr* address,
1054 	socklen_t* _addressLength)
1055 {
1056 	if (socket->peer.ss_len == 0)
1057 		return ENOTCONN;
1058 
1059 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1060 	*_addressLength = socket->peer.ss_len;
1061 	return B_OK;
1062 }
1063 
1064 
1065 int
1066 socket_getsockname(net_socket* socket, struct sockaddr* address,
1067 	socklen_t* _addressLength)
1068 {
1069 	if (socket->address.ss_len == 0)
1070 		return ENOTCONN;
1071 
1072 	memcpy(address, &socket->address, min_c(*_addressLength,
1073 		socket->address.ss_len));
1074 	*_addressLength = socket->address.ss_len;
1075 	return B_OK;
1076 }
1077 
1078 
1079 status_t
1080 socket_get_option(net_socket* socket, int level, int option, void* value,
1081 	int* _length)
1082 {
1083 	if (level != SOL_SOCKET)
1084 		return ENOPROTOOPT;
1085 
1086 	switch (option) {
1087 		case SO_SNDBUF:
1088 		{
1089 			uint32* size = (uint32*)value;
1090 			*size = socket->send.buffer_size;
1091 			*_length = sizeof(uint32);
1092 			return B_OK;
1093 		}
1094 
1095 		case SO_RCVBUF:
1096 		{
1097 			uint32* size = (uint32*)value;
1098 			*size = socket->receive.buffer_size;
1099 			*_length = sizeof(uint32);
1100 			return B_OK;
1101 		}
1102 
1103 		case SO_SNDLOWAT:
1104 		{
1105 			uint32* size = (uint32*)value;
1106 			*size = socket->send.low_water_mark;
1107 			*_length = sizeof(uint32);
1108 			return B_OK;
1109 		}
1110 
1111 		case SO_RCVLOWAT:
1112 		{
1113 			uint32* size = (uint32*)value;
1114 			*size = socket->receive.low_water_mark;
1115 			*_length = sizeof(uint32);
1116 			return B_OK;
1117 		}
1118 
1119 		case SO_RCVTIMEO:
1120 		case SO_SNDTIMEO:
1121 		{
1122 			if (*_length < (int)sizeof(struct timeval))
1123 				return B_BAD_VALUE;
1124 
1125 			bigtime_t timeout;
1126 			if (option == SO_SNDTIMEO)
1127 				timeout = socket->send.timeout;
1128 			else
1129 				timeout = socket->receive.timeout;
1130 			if (timeout == B_INFINITE_TIMEOUT)
1131 				timeout = 0;
1132 
1133 			struct timeval* timeval = (struct timeval*)value;
1134 			timeval->tv_sec = timeout / 1000000LL;
1135 			timeval->tv_usec = timeout % 1000000LL;
1136 
1137 			*_length = sizeof(struct timeval);
1138 			return B_OK;
1139 		}
1140 
1141 		case SO_NONBLOCK:
1142 		{
1143 			int32* _set = (int32*)value;
1144 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1145 			*_length = sizeof(int32);
1146 			return B_OK;
1147 		}
1148 
1149 		case SO_ACCEPTCONN:
1150 		case SO_BROADCAST:
1151 		case SO_DEBUG:
1152 		case SO_DONTROUTE:
1153 		case SO_KEEPALIVE:
1154 		case SO_OOBINLINE:
1155 		case SO_REUSEADDR:
1156 		case SO_REUSEPORT:
1157 		case SO_USELOOPBACK:
1158 		{
1159 			int32* _set = (int32*)value;
1160 			*_set = (socket->options & option) != 0;
1161 			*_length = sizeof(int32);
1162 			return B_OK;
1163 		}
1164 
1165 		case SO_TYPE:
1166 		{
1167 			int32* _set = (int32*)value;
1168 			*_set = socket->type;
1169 			*_length = sizeof(int32);
1170 			return B_OK;
1171 		}
1172 
1173 		case SO_ERROR:
1174 		{
1175 			int32* _set = (int32*)value;
1176 			*_set = socket->error;
1177 			*_length = sizeof(int32);
1178 
1179 			socket->error = B_OK;
1180 				// clear error upon retrieval
1181 			return B_OK;
1182 		}
1183 
1184 		default:
1185 			break;
1186 	}
1187 
1188 	dprintf("socket_getsockopt: unknown option %d\n", option);
1189 	return ENOPROTOOPT;
1190 }
1191 
1192 
1193 int
1194 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1195 	int* _length)
1196 {
1197 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1198 		level, option, value, _length);
1199 }
1200 
1201 
1202 int
1203 socket_listen(net_socket* socket, int backlog)
1204 {
1205 	status_t status = socket->first_info->listen(socket->first_protocol,
1206 		backlog);
1207 	if (status == B_OK)
1208 		socket->options |= SO_ACCEPTCONN;
1209 
1210 	return status;
1211 }
1212 
1213 
1214 ssize_t
1215 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1216 	int flags)
1217 {
1218 	// If the protocol sports read_data_no_buffer() we use it.
1219 	if (socket->first_info->read_data_no_buffer != NULL)
1220 		return socket_receive_no_buffer(socket, header, data, length, flags);
1221 
1222 	size_t totalLength = length;
1223 	net_buffer* buffer;
1224 	int i;
1225 
1226 	// the convention to this function is that have header been
1227 	// present, { data, length } would have been iovec[0] and is
1228 	// always considered like that
1229 
1230 	if (header) {
1231 		// calculate the length considering all of the extra buffers
1232 		for (i = 1; i < header->msg_iovlen; i++)
1233 			totalLength += header->msg_iov[i].iov_len;
1234 	}
1235 
1236 	status_t status = socket->first_info->read_data(
1237 		socket->first_protocol, totalLength, flags, &buffer);
1238 	if (status != B_OK)
1239 		return status;
1240 
1241 	// process ancillary data
1242 	if (header != NULL) {
1243 		if (buffer != NULL && header->msg_control != NULL) {
1244 			ancillary_data_container* container
1245 				= gNetBufferModule.get_ancillary_data(buffer);
1246 			if (container != NULL)
1247 				status = process_ancillary_data(socket, container, header);
1248 			else
1249 				status = process_ancillary_data(socket, buffer, header);
1250 			if (status != B_OK) {
1251 				gNetBufferModule.free(buffer);
1252 				return status;
1253 			}
1254 		} else
1255 			header->msg_controllen = 0;
1256 	}
1257 
1258 	// TODO: - returning a NULL buffer when received 0 bytes
1259 	//         may not make much sense as we still need the address
1260 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1261 
1262 	size_t nameLen = 0;
1263 
1264 	if (header) {
1265 		// TODO: - consider the control buffer options
1266 		nameLen = header->msg_namelen;
1267 		header->msg_namelen = 0;
1268 		header->msg_flags = 0;
1269 	}
1270 
1271 	if (buffer == NULL)
1272 		return 0;
1273 
1274 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1275 
1276 	length = min_c(bytesReceived, length);
1277 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1278 		gNetBufferModule.free(buffer);
1279 		return ENOBUFS;
1280 	}
1281 
1282 	// if first copy was a success, proceed to following
1283 	// copies as required
1284 	bytesCopied += length;
1285 
1286 	if (header) {
1287 		// we only start considering at iovec[1]
1288 		// as { data, length } is iovec[0]
1289 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1290 			iovec& vec = header->msg_iov[i];
1291 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1292 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1293 					toRead) < B_OK) {
1294 				break;
1295 			}
1296 
1297 			bytesCopied += toRead;
1298 		}
1299 
1300 		if (header->msg_name != NULL) {
1301 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1302 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1303 		}
1304 	}
1305 
1306 	gNetBufferModule.free(buffer);
1307 
1308 	if (bytesCopied < bytesReceived) {
1309 		if (header)
1310 			header->msg_flags = MSG_TRUNC;
1311 
1312 		if (flags & MSG_TRUNC)
1313 			return bytesReceived;
1314 	}
1315 
1316 	return bytesCopied;
1317 }
1318 
1319 
1320 ssize_t
1321 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1322 	int flags)
1323 {
1324 	const sockaddr* address = NULL;
1325 	socklen_t addressLength = 0;
1326 	size_t bytesLeft = length;
1327 
1328 	if (length > SSIZE_MAX)
1329 		return B_BAD_VALUE;
1330 
1331 	ancillary_data_container* ancillaryData = NULL;
1332 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(NULL,
1333 		&delete_ancillary_data_container);
1334 
1335 	if (header != NULL) {
1336 		address = (const sockaddr*)header->msg_name;
1337 		addressLength = header->msg_namelen;
1338 
1339 		// get the ancillary data
1340 		if (header->msg_control != NULL) {
1341 			ancillaryData = create_ancillary_data_container();
1342 			if (ancillaryData == NULL)
1343 				return B_NO_MEMORY;
1344 			ancillaryDataDeleter.SetTo(ancillaryData);
1345 
1346 			status_t status = add_ancillary_data(socket, ancillaryData,
1347 				(cmsghdr*)header->msg_control, header->msg_controllen);
1348 			if (status != B_OK)
1349 				return status;
1350 		}
1351 	}
1352 
1353 	if (addressLength == 0)
1354 		address = NULL;
1355 	else if (address == NULL)
1356 		return B_BAD_VALUE;
1357 
1358 	if (socket->peer.ss_len != 0) {
1359 		if (address != NULL)
1360 			return EISCONN;
1361 
1362 		// socket is connected, we use that address
1363 		address = (struct sockaddr*)&socket->peer;
1364 		addressLength = socket->peer.ss_len;
1365 	}
1366 
1367 	if (address == NULL || addressLength == 0) {
1368 		// don't know where to send to:
1369 		return EDESTADDRREQ;
1370 	}
1371 
1372 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1373 		&& bytesLeft > socket->send.buffer_size)
1374 		return EMSGSIZE;
1375 
1376 	if (socket->address.ss_len == 0) {
1377 		// try to bind first
1378 		status_t status = socket_bind(socket, NULL, 0);
1379 		if (status != B_OK)
1380 			return status;
1381 	}
1382 
1383 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1384 	if (socket->first_info->send_data_no_buffer != NULL) {
1385 		iovec stackVec = { (void*)data, length };
1386 		iovec* vecs = header ? header->msg_iov : &stackVec;
1387 		int vecCount = header ? header->msg_iovlen : 1;
1388 
1389 		ssize_t written = socket->first_info->send_data_no_buffer(
1390 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1391 			addressLength);
1392 		if (written > 0)
1393 			ancillaryDataDeleter.Detach();
1394 		return written;
1395 	}
1396 
1397 	// By convention, if a header is given, the (data, length) equals the first
1398 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1399 	// the size of the remaining ones.
1400 	if (header != NULL) {
1401 		if (header->msg_iovlen <= 1)
1402 			header = NULL;
1403 		else {
1404 // TODO: The iovecs have already been copied to kernel space. Simplify!
1405 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1406 				header->msg_iovlen - 1);
1407 		}
1408 	}
1409 
1410 	ssize_t bytesSent = 0;
1411 	size_t vecOffset = 0;
1412 	uint32 vecIndex = 0;
1413 
1414 	while (bytesLeft > 0) {
1415 		// TODO: useful, maybe even computed header space!
1416 		net_buffer* buffer = gNetBufferModule.create(256);
1417 		if (buffer == NULL)
1418 			return ENOBUFS;
1419 
1420 		while (buffer->size < socket->send.buffer_size
1421 			&& buffer->size < bytesLeft) {
1422 			if (vecIndex > 0 && vecOffset == 0) {
1423 				// retrieve next iovec buffer from header
1424 				iovec vec;
1425 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1426 						< B_OK) {
1427 					gNetBufferModule.free(buffer);
1428 					return B_BAD_ADDRESS;
1429 				}
1430 
1431 				data = vec.iov_base;
1432 				length = vec.iov_len;
1433 			}
1434 
1435 			size_t bytes = length;
1436 			if (buffer->size + bytes > socket->send.buffer_size)
1437 				bytes = socket->send.buffer_size - buffer->size;
1438 
1439 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1440 				gNetBufferModule.free(buffer);
1441 				return ENOBUFS;
1442 			}
1443 
1444 			if (bytes != length) {
1445 				// partial send
1446 				vecOffset = bytes;
1447 				length -= vecOffset;
1448 				data = (uint8*)data + vecOffset;
1449 			} else if (header != NULL) {
1450 				// proceed with next buffer, if any
1451 				vecOffset = 0;
1452 				vecIndex++;
1453 
1454 				if (vecIndex >= (uint32)header->msg_iovlen)
1455 					break;
1456 			}
1457 		}
1458 
1459 		// attach ancillary data to the first buffer
1460 		status_t status = B_OK;
1461 		if (ancillaryData != NULL) {
1462 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1463 			ancillaryDataDeleter.Detach();
1464 			ancillaryData = NULL;
1465 		}
1466 
1467 		size_t bufferSize = buffer->size;
1468 		buffer->flags = flags;
1469 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1470 		memcpy(buffer->destination, address, addressLength);
1471 		buffer->destination->sa_len = addressLength;
1472 
1473 		if (status == B_OK) {
1474 			status = socket->first_info->send_data(socket->first_protocol,
1475 				buffer);
1476 		}
1477 		if (status != B_OK) {
1478 			size_t sizeAfterSend = buffer->size;
1479 			gNetBufferModule.free(buffer);
1480 
1481 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1482 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1483 				// this appears to be a partial write
1484 				return bytesSent + (bufferSize - sizeAfterSend);
1485 			}
1486 			return status;
1487 		}
1488 
1489 		bytesLeft -= bufferSize;
1490 		bytesSent += bufferSize;
1491 	}
1492 
1493 	return bytesSent;
1494 }
1495 
1496 
1497 status_t
1498 socket_set_option(net_socket* socket, int level, int option, const void* value,
1499 	int length)
1500 {
1501 	if (level != SOL_SOCKET)
1502 		return ENOPROTOOPT;
1503 
1504 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1505 
1506 	switch (option) {
1507 		// TODO: implement other options!
1508 		case SO_LINGER:
1509 		{
1510 			if (length < (int)sizeof(struct linger))
1511 				return B_BAD_VALUE;
1512 
1513 			struct linger* linger = (struct linger*)value;
1514 			if (linger->l_onoff) {
1515 				socket->options |= SO_LINGER;
1516 				socket->linger = linger->l_linger;
1517 			} else {
1518 				socket->options &= ~SO_LINGER;
1519 				socket->linger = 0;
1520 			}
1521 			return B_OK;
1522 		}
1523 
1524 		case SO_SNDBUF:
1525 			if (length != sizeof(uint32))
1526 				return B_BAD_VALUE;
1527 
1528 			socket->send.buffer_size = *(const uint32*)value;
1529 			return B_OK;
1530 
1531 		case SO_RCVBUF:
1532 			if (length != sizeof(uint32))
1533 				return B_BAD_VALUE;
1534 
1535 			socket->receive.buffer_size = *(const uint32*)value;
1536 			return B_OK;
1537 
1538 		case SO_SNDLOWAT:
1539 			if (length != sizeof(uint32))
1540 				return B_BAD_VALUE;
1541 
1542 			socket->send.low_water_mark = *(const uint32*)value;
1543 			return B_OK;
1544 
1545 		case SO_RCVLOWAT:
1546 			if (length != sizeof(uint32))
1547 				return B_BAD_VALUE;
1548 
1549 			socket->receive.low_water_mark = *(const uint32*)value;
1550 			return B_OK;
1551 
1552 		case SO_RCVTIMEO:
1553 		case SO_SNDTIMEO:
1554 		{
1555 			if (length != sizeof(struct timeval))
1556 				return B_BAD_VALUE;
1557 
1558 			const struct timeval* timeval = (const struct timeval*)value;
1559 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1560 			if (timeout == 0)
1561 				timeout = B_INFINITE_TIMEOUT;
1562 
1563 			if (option == SO_SNDTIMEO)
1564 				socket->send.timeout = timeout;
1565 			else
1566 				socket->receive.timeout = timeout;
1567 			return B_OK;
1568 		}
1569 
1570 		case SO_NONBLOCK:
1571 			if (length != sizeof(int32))
1572 				return B_BAD_VALUE;
1573 
1574 			if (*(const int32*)value) {
1575 				socket->send.timeout = 0;
1576 				socket->receive.timeout = 0;
1577 			} else {
1578 				socket->send.timeout = B_INFINITE_TIMEOUT;
1579 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1580 			}
1581 			return B_OK;
1582 
1583 		case SO_BROADCAST:
1584 		case SO_DEBUG:
1585 		case SO_DONTROUTE:
1586 		case SO_KEEPALIVE:
1587 		case SO_OOBINLINE:
1588 		case SO_REUSEADDR:
1589 		case SO_REUSEPORT:
1590 		case SO_USELOOPBACK:
1591 			if (length != sizeof(int32))
1592 				return B_BAD_VALUE;
1593 
1594 			if (*(const int32*)value)
1595 				socket->options |= option;
1596 			else
1597 				socket->options &= ~option;
1598 			return B_OK;
1599 
1600 		case SO_BINDTODEVICE:
1601 		{
1602 			if (length != sizeof(uint32))
1603 				return B_BAD_VALUE;
1604 
1605 			// TODO: we might want to check if the device exists at all
1606 			// (although it doesn't really harm when we don't)
1607 			socket->bound_to_device = *(const uint32*)value;
1608 			return B_OK;
1609 		}
1610 
1611 		default:
1612 			break;
1613 	}
1614 
1615 	dprintf("socket_setsockopt: unknown option %d\n", option);
1616 	return ENOPROTOOPT;
1617 }
1618 
1619 
1620 int
1621 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1622 	int length)
1623 {
1624 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1625 		level, option, value, length);
1626 }
1627 
1628 
1629 int
1630 socket_shutdown(net_socket* socket, int direction)
1631 {
1632 	return socket->first_info->shutdown(socket->first_protocol, direction);
1633 }
1634 
1635 
1636 status_t
1637 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1638 {
1639 	sockets[0] = NULL;
1640 	sockets[1] = NULL;
1641 
1642 	// create sockets
1643 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1644 	if (error != B_OK)
1645 		return error;
1646 
1647 	if (error == B_OK)
1648 		error = socket_open(family, type, protocol, &sockets[1]);
1649 
1650 	// bind one
1651 	if (error == B_OK)
1652 		error = socket_bind(sockets[0], NULL, 0);
1653 
1654 	// start listening
1655 	if (error == B_OK)
1656 		error = socket_listen(sockets[0], 1);
1657 
1658 	// connect them
1659 	if (error == B_OK) {
1660 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1661 			sockets[0]->address.ss_len);
1662 	}
1663 
1664 	// accept a socket
1665 	net_socket* acceptedSocket = NULL;
1666 	if (error == B_OK)
1667 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1668 
1669 	if (error == B_OK) {
1670 		// everything worked: close the listener socket
1671 		socket_close(sockets[0]);
1672 		socket_free(sockets[0]);
1673 		sockets[0] = acceptedSocket;
1674 	} else {
1675 		// close sockets on error
1676 		for (int i = 0; i < 2; i++) {
1677 			if (sockets[i] != NULL) {
1678 				socket_close(sockets[i]);
1679 				socket_free(sockets[i]);
1680 				sockets[i] = NULL;
1681 			}
1682 		}
1683 	}
1684 
1685 	return error;
1686 }
1687 
1688 
1689 //	#pragma mark -
1690 
1691 
1692 static status_t
1693 socket_std_ops(int32 op, ...)
1694 {
1695 	switch (op) {
1696 		case B_MODULE_INIT:
1697 		{
1698 			new (&sSocketList) SocketList;
1699 			mutex_init(&sSocketLock, "socket list");
1700 
1701 #if ENABLE_DEBUGGER_COMMANDS
1702 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1703 			add_debugger_command("socket", dump_socket, "dumps a socket");
1704 #endif
1705 			return B_OK;
1706 		}
1707 		case B_MODULE_UNINIT:
1708 			ASSERT(sSocketList.IsEmpty());
1709 			mutex_destroy(&sSocketLock);
1710 
1711 #if ENABLE_DEBUGGER_COMMANDS
1712 			remove_debugger_command("socket", dump_socket);
1713 			remove_debugger_command("sockets", dump_sockets);
1714 #endif
1715 			return B_OK;
1716 
1717 		default:
1718 			return B_ERROR;
1719 	}
1720 }
1721 
1722 
1723 net_socket_module_info gNetSocketModule = {
1724 	{
1725 		NET_SOCKET_MODULE_NAME,
1726 		0,
1727 		socket_std_ops
1728 	},
1729 	socket_open,
1730 	socket_close,
1731 	socket_free,
1732 
1733 	socket_readv,
1734 	socket_writev,
1735 	socket_control,
1736 
1737 	socket_read_avail,
1738 	socket_send_avail,
1739 
1740 	socket_send_data,
1741 	socket_receive_data,
1742 
1743 	socket_get_option,
1744 	socket_set_option,
1745 
1746 	socket_get_next_stat,
1747 
1748 	// connections
1749 	socket_acquire,
1750 	socket_release,
1751 	socket_spawn_pending,
1752 	socket_dequeue_connected,
1753 	socket_count_connected,
1754 	socket_set_max_backlog,
1755 	socket_has_parent,
1756 	socket_connected,
1757 	socket_aborted,
1758 
1759 	// notifications
1760 	socket_request_notification,
1761 	socket_cancel_notification,
1762 	socket_notify,
1763 
1764 	// standard socket API
1765 	socket_accept,
1766 	socket_bind,
1767 	socket_connect,
1768 	socket_getpeername,
1769 	socket_getsockname,
1770 	socket_getsockopt,
1771 	socket_listen,
1772 	socket_receive,
1773 	socket_send,
1774 	socket_setsockopt,
1775 	socket_shutdown,
1776 	socket_socketpair
1777 };
1778 
1779