xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 6f80a9801fedbe7355c4360bd204ba746ec3ec2d)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static size_t
166 compute_user_iovec_length(iovec* userVec, uint32 count)
167 {
168 	size_t length = 0;
169 
170 	for (uint32 i = 0; i < count; i++) {
171 		iovec vec;
172 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
173 			return 0;
174 
175 		length += vec.iov_len;
176 	}
177 
178 	return length;
179 }
180 
181 
182 static status_t
183 create_socket(int family, int type, int protocol, net_socket_private** _socket)
184 {
185 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
186 	if (socket == NULL)
187 		return B_NO_MEMORY;
188 	status_t status = socket->InitCheck();
189 	if (status != B_OK) {
190 		delete socket;
191 		return status;
192 	}
193 
194 	socket->family = family;
195 	socket->type = type;
196 	socket->protocol = protocol;
197 
198 	status = get_domain_protocols(socket);
199 	if (status != B_OK) {
200 		delete socket;
201 		return status;
202 	}
203 
204 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
205 		socket->type, socket->protocol);
206 
207 #ifdef TRACE_SOCKET
208 	net_protocol* current = socket->first_protocol;
209 	for (int i = 0; current != NULL; current = current->next, i++)
210 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
211 #endif
212 
213 	*_socket = socket;
214 	return B_OK;
215 }
216 
217 
218 static status_t
219 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
220 	void* data, size_t dataLen)
221 {
222 	cmsghdr* header = (cmsghdr*)data;
223 
224 	if (dataLen == 0)
225 		return B_OK;
226 
227 	if (socket->first_info->add_ancillary_data == NULL)
228 		return B_NOT_SUPPORTED;
229 
230 	while (true) {
231 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
232 			return B_BAD_VALUE;
233 
234 		status_t status = socket->first_info->add_ancillary_data(
235 			socket->first_protocol, container, header);
236 		if (status != B_OK)
237 			return status;
238 
239 		if (dataLen <= _ALIGN(header->cmsg_len))
240 			break;
241 		dataLen -= _ALIGN(header->cmsg_len);
242 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
243 	}
244 
245 	return B_OK;
246 }
247 
248 
249 static status_t
250 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
251 	msghdr* messageHeader)
252 {
253 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
254 	int dataBufferLen = messageHeader->msg_controllen;
255 
256 	if (container == NULL || dataBuffer == NULL) {
257 		messageHeader->msg_controllen = 0;
258 		return B_OK;
259 	}
260 
261 	ancillary_data_header header;
262 	void* data = NULL;
263 
264 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
265 		if (socket->first_info->process_ancillary_data == NULL)
266 			return B_NOT_SUPPORTED;
267 
268 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
269 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
270 		if (bytesWritten < 0)
271 			return bytesWritten;
272 
273 		dataBuffer += bytesWritten;
274 		dataBufferLen -= bytesWritten;
275 	}
276 
277 	messageHeader->msg_controllen -= dataBufferLen;
278 
279 	return B_OK;
280 }
281 
282 
283 static status_t
284 process_ancillary_data(net_socket* socket,
285 	net_buffer* buffer, msghdr* messageHeader)
286 {
287 	void *dataBuffer = messageHeader->msg_control;
288 	ssize_t bytesWritten;
289 
290 	if (dataBuffer == NULL) {
291 		messageHeader->msg_controllen = 0;
292 		return B_OK;
293 	}
294 
295 	if (socket->first_info->process_ancillary_data_no_container == NULL)
296 		return B_NOT_SUPPORTED;
297 
298 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
299 		socket->first_protocol, buffer, dataBuffer,
300 		messageHeader->msg_controllen);
301 	if (bytesWritten < 0)
302 		return bytesWritten;
303 	messageHeader->msg_controllen = bytesWritten;
304 
305 	return B_OK;
306 }
307 
308 
309 static ssize_t
310 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
311 	size_t length, int flags)
312 {
313 	iovec stackVec = { data, length };
314 	iovec* vecs = header ? header->msg_iov : &stackVec;
315 	int vecCount = header ? header->msg_iovlen : 1;
316 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
317 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
318 
319 	ancillary_data_container* ancillaryData = NULL;
320 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
321 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
322 		addressLen);
323 	if (bytesRead < 0)
324 		return bytesRead;
325 
326 	CObjectDeleter<
327 		ancillary_data_container, void, delete_ancillary_data_container>
328 		ancillaryDataDeleter(ancillaryData);
329 
330 	// process ancillary data
331 	if (header != NULL) {
332 		status_t status = process_ancillary_data(socket, ancillaryData, header);
333 		if (status != B_OK)
334 			return status;
335 
336 		header->msg_flags = 0;
337 	}
338 
339 	return bytesRead;
340 }
341 
342 
343 #if ENABLE_DEBUGGER_COMMANDS
344 
345 
346 static void
347 print_socket_line(net_socket_private* socket, const char* prefix)
348 {
349 	BReference<net_socket_private> parent;
350 	if (socket->parent.PrivatePointer() != NULL)
351 		parent = socket->parent.GetReference();
352 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
353 		socket->family, socket->type, socket->protocol, socket->owner,
354 		socket->first_protocol, socket->first_info, parent.Get(),
355 		parent.IsSet() ? socket->is_connected ? " (c)" : " (p)" : "");
356 }
357 
358 
359 static int
360 dump_socket(int argc, char** argv)
361 {
362 	if (argc < 2) {
363 		kprintf("usage: %s [address]\n", argv[0]);
364 		return 0;
365 	}
366 
367 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
368 
369 	kprintf("SOCKET %p\n", socket);
370 	kprintf("  family.type.protocol: %d.%d.%d\n",
371 		socket->family, socket->type, socket->protocol);
372 	BReference<net_socket_private> parent;
373 	if (socket->parent.PrivatePointer() != NULL)
374 		parent = socket->parent.GetReference();
375 	kprintf("  parent:               %p\n", parent.Get());
376 	kprintf("  first protocol:       %p\n", socket->first_protocol);
377 	kprintf("  first module_info:    %p\n", socket->first_info);
378 	kprintf("  options:              %x\n", socket->options);
379 	kprintf("  linger:               %d\n", socket->linger);
380 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
381 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
382 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
383 	kprintf("  is connected:         %d\n", socket->is_connected);
384 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
385 
386 	if (socket->child_count == 0)
387 		return 0;
388 
389 	kprintf("    pending children:\n");
390 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
391 	while (net_socket_private* child = iterator.Next()) {
392 		print_socket_line(child, "      ");
393 	}
394 
395 	kprintf("    connected children:\n");
396 	iterator = socket->connected_children.GetIterator();
397 	while (net_socket_private* child = iterator.Next()) {
398 		print_socket_line(child, "      ");
399 	}
400 
401 	return 0;
402 }
403 
404 
405 static int
406 dump_sockets(int argc, char** argv)
407 {
408 	kprintf("address        kind  owner protocol   module_info parent\n");
409 
410 	SocketList::Iterator iterator = sSocketList.GetIterator();
411 	while (net_socket_private* socket = iterator.Next()) {
412 		print_socket_line(socket, "");
413 
414 		SocketList::Iterator childIterator
415 			= socket->pending_children.GetIterator();
416 		while (net_socket_private* child = childIterator.Next()) {
417 			print_socket_line(child, " ");
418 		}
419 
420 		childIterator = socket->connected_children.GetIterator();
421 		while (net_socket_private* child = childIterator.Next()) {
422 			print_socket_line(child, " ");
423 		}
424 	}
425 
426 	return 0;
427 }
428 
429 
430 #endif	// ENABLE_DEBUGGER_COMMANDS
431 
432 
433 //	#pragma mark -
434 
435 
436 status_t
437 socket_open(int family, int type, int protocol, net_socket** _socket)
438 {
439 	net_socket_private* socket;
440 	status_t status = create_socket(family, type, protocol, &socket);
441 	if (status != B_OK)
442 		return status;
443 
444 	status = socket->first_info->open(socket->first_protocol);
445 	if (status != B_OK) {
446 		delete socket;
447 		return status;
448 	}
449 
450 	socket->owner = team_get_current_team_id();
451 	socket->is_in_socket_list = true;
452 
453 	mutex_lock(&sSocketLock);
454 	sSocketList.Add(socket);
455 	mutex_unlock(&sSocketLock);
456 
457 	*_socket = socket;
458 	return B_OK;
459 }
460 
461 
462 status_t
463 socket_close(net_socket* _socket)
464 {
465 	net_socket_private* socket = (net_socket_private*)_socket;
466 	return socket->first_info->close(socket->first_protocol);
467 }
468 
469 
470 void
471 socket_free(net_socket* _socket)
472 {
473 	net_socket_private* socket = (net_socket_private*)_socket;
474 	socket->first_info->free(socket->first_protocol);
475 	socket->ReleaseReference();
476 }
477 
478 
479 status_t
480 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
481 	size_t* _length)
482 {
483 	return -1;
484 }
485 
486 
487 status_t
488 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
489 	size_t* _length)
490 {
491 	if (socket->peer.ss_len == 0)
492 		return ECONNRESET;
493 
494 	if (socket->address.ss_len == 0) {
495 		// try to bind first
496 		status_t status = socket_bind(socket, NULL, 0);
497 		if (status != B_OK)
498 			return status;
499 	}
500 
501 	// TODO: useful, maybe even computed header space!
502 	net_buffer* buffer = gNetBufferModule.create(256);
503 	if (buffer == NULL)
504 		return ENOBUFS;
505 
506 	// copy data into buffer
507 
508 	for (uint32 i = 0; i < vecCount; i++) {
509 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
510 				vecs[i].iov_len) < B_OK) {
511 			gNetBufferModule.free(buffer);
512 			return ENOBUFS;
513 		}
514 	}
515 
516 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
517 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
518 	size_t size = buffer->size;
519 
520 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
521 		buffer);
522 	if (bytesWritten < B_OK) {
523 		if (buffer->size != size) {
524 			// this appears to be a partial write
525 			*_length = size - buffer->size;
526 		}
527 		gNetBufferModule.free(buffer);
528 		return bytesWritten;
529 	}
530 
531 	*_length = bytesWritten;
532 	return B_OK;
533 }
534 
535 
536 status_t
537 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
538 {
539 	switch (op) {
540 		case FIONBIO:
541 		{
542 			if (data == NULL)
543 				return B_BAD_VALUE;
544 
545 			int value;
546 			if (is_syscall()) {
547 				if (!IS_USER_ADDRESS(data)
548 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
549 					return B_BAD_ADDRESS;
550 				}
551 			} else
552 				value = *(int*)data;
553 
554 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
555 				sizeof(int));
556 		}
557 
558 		case FIONREAD:
559 		{
560 			if (data == NULL)
561 				return B_BAD_VALUE;
562 
563 			int available = (int)socket_read_avail(socket);
564 			if (available < 0)
565 				return available;
566 
567 			if (is_syscall()) {
568 				if (!IS_USER_ADDRESS(data)
569 					|| user_memcpy(data, &available, sizeof(available))
570 						!= B_OK) {
571 					return B_BAD_ADDRESS;
572 				}
573 			} else
574 				*(int*)data = available;
575 
576 			return B_OK;
577 		}
578 
579 		case B_SET_BLOCKING_IO:
580 		case B_SET_NONBLOCKING_IO:
581 		{
582 			int value = op == B_SET_NONBLOCKING_IO;
583 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
584 				sizeof(int));
585 		}
586 	}
587 
588 	return socket->first_info->control(socket->first_protocol,
589 		LEVEL_DRIVER_IOCTL, op, data, &length);
590 }
591 
592 
593 ssize_t
594 socket_read_avail(net_socket* socket)
595 {
596 	return socket->first_info->read_avail(socket->first_protocol);
597 }
598 
599 
600 ssize_t
601 socket_send_avail(net_socket* socket)
602 {
603 	return socket->first_info->send_avail(socket->first_protocol);
604 }
605 
606 
607 status_t
608 socket_send_data(net_socket* socket, net_buffer* buffer)
609 {
610 	return socket->first_info->send_data(socket->first_protocol,
611 		buffer);
612 }
613 
614 
615 status_t
616 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
617 	net_buffer** _buffer)
618 {
619 	status_t status = socket->first_info->read_data(socket->first_protocol,
620 		length, flags, _buffer);
621 	if (status != B_OK)
622 		return status;
623 
624 	if (*_buffer && length < (*_buffer)->size) {
625 		// discard any data behind the amount requested
626 		gNetBufferModule.trim(*_buffer, length);
627 	}
628 
629 	return status;
630 }
631 
632 
633 status_t
634 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
635 {
636 	MutexLocker locker(sSocketLock);
637 
638 	net_socket_private* socket = NULL;
639 	SocketList::Iterator iterator = sSocketList.GetIterator();
640 	uint32 cookie = *_cookie;
641 	uint32 count = 0;
642 
643 	while (true) {
644 		socket = iterator.Next();
645 		if (socket == NULL)
646 			return B_ENTRY_NOT_FOUND;
647 
648 		// TODO: also traverse the pending connections
649 		if (count == cookie)
650 			break;
651 
652 		if (family == -1 || family == socket->family)
653 			count++;
654 	}
655 
656 	*_cookie = count + 1;
657 
658 	stat->family = socket->family;
659 	stat->type = socket->type;
660 	stat->protocol = socket->protocol;
661 	stat->owner = socket->owner;
662 	stat->state[0] = '\0';
663 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
664 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
665 	stat->receive_queue_size = 0;
666 	stat->send_queue_size = 0;
667 
668 	// fill in protocol specific data (if supported by the protocol)
669 	size_t length = sizeof(net_stat);
670 	socket->first_info->control(socket->first_protocol, socket->protocol,
671 		NET_STAT_SOCKET, stat, &length);
672 
673 	return B_OK;
674 }
675 
676 
677 //	#pragma mark - connections
678 
679 
680 bool
681 socket_acquire(net_socket* _socket)
682 {
683 	net_socket_private* socket = (net_socket_private*)_socket;
684 
685 	// During destruction, the socket might still be accessible over its
686 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
687 	// socket anymore -- while not obvious, the endpoint protocol is responsible
688 	// for the proper locking here.
689 	if (socket->CountReferences() == 0)
690 		return false;
691 
692 	socket->AcquireReference();
693 	return true;
694 }
695 
696 
697 bool
698 socket_release(net_socket* _socket)
699 {
700 	net_socket_private* socket = (net_socket_private*)_socket;
701 	return socket->ReleaseReference();
702 }
703 
704 
705 status_t
706 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
707 {
708 	net_socket_private* parent = (net_socket_private*)_parent;
709 
710 	TRACE("%s(%p)\n", __FUNCTION__, parent);
711 
712 	MutexLocker locker(parent->lock);
713 
714 	// We actually accept more pending connections to compensate for those
715 	// that never complete, and also make sure at least a single connection
716 	// can always be accepted
717 	if (parent->child_count > 3 * parent->max_backlog / 2)
718 		return ENOBUFS;
719 
720 	net_socket_private* socket;
721 	status_t status = create_socket(parent->family, parent->type,
722 		parent->protocol, &socket);
723 	if (status != B_OK)
724 		return status;
725 
726 	// inherit parent's properties
727 	socket->send = parent->send;
728 	socket->receive = parent->receive;
729 	socket->options = parent->options & ~SO_ACCEPTCONN;
730 	socket->linger = parent->linger;
731 	socket->owner = parent->owner;
732 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
733 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
734 
735 	// add to the parent's list of pending connections
736 	parent->pending_children.Add(socket);
737 	socket->parent = parent;
738 	parent->child_count++;
739 
740 	*_socket = socket;
741 	return B_OK;
742 }
743 
744 
745 /*!	Dequeues a connected child from a parent socket.
746 	It also returns a reference with the child socket.
747 */
748 status_t
749 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
750 {
751 	net_socket_private* parent = (net_socket_private*)_parent;
752 
753 	mutex_lock(&parent->lock);
754 
755 	net_socket_private* socket = parent->connected_children.RemoveHead();
756 	if (socket != NULL) {
757 		socket->AcquireReference();
758 		socket->RemoveFromParent();
759 		parent->child_count--;
760 		*_socket = socket;
761 	}
762 
763 	mutex_unlock(&parent->lock);
764 
765 	if (socket == NULL)
766 		return B_ENTRY_NOT_FOUND;
767 
768 	return B_OK;
769 }
770 
771 
772 ssize_t
773 socket_count_connected(net_socket* _parent)
774 {
775 	net_socket_private* parent = (net_socket_private*)_parent;
776 
777 	MutexLocker _(parent->lock);
778 	return parent->connected_children.Count();
779 }
780 
781 
782 status_t
783 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
784 {
785 	net_socket_private* socket = (net_socket_private*)_socket;
786 
787 	// we enforce an upper limit of connections waiting to be accepted
788 	if (backlog > 256)
789 		backlog = 256;
790 
791 	MutexLocker _(socket->lock);
792 
793 	// first remove the pending connections, then the already connected
794 	// ones as needed
795 	net_socket_private* child;
796 	while (socket->child_count > backlog
797 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
798 		child->RemoveFromParent();
799 		socket->child_count--;
800 	}
801 	while (socket->child_count > backlog
802 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
803 		child->RemoveFromParent();
804 		socket->child_count--;
805 	}
806 
807 	socket->max_backlog = backlog;
808 	return B_OK;
809 }
810 
811 
812 /*!	Returns whether or not this socket has a parent. The parent might not be
813 	valid anymore, though.
814 */
815 bool
816 socket_has_parent(net_socket* _socket)
817 {
818 	net_socket_private* socket = (net_socket_private*)_socket;
819 	return socket->parent != NULL;
820 }
821 
822 
823 /*!	The socket has been connected. It will be moved to the connected queue
824 	of its parent socket.
825 */
826 status_t
827 socket_connected(net_socket* _socket)
828 {
829 	net_socket_private* socket = (net_socket_private*)_socket;
830 
831 	TRACE("socket_connected(%p)\n", socket);
832 
833 	if (socket->parent == NULL) {
834 		socket->is_connected = true;
835 		return B_OK;
836 	}
837 
838 	BReference<net_socket_private> parent = socket->parent.GetReference();
839 	if (!parent.IsSet())
840 		return B_BAD_VALUE;
841 
842 	MutexLocker _(parent->lock);
843 
844 	parent->pending_children.Remove(socket);
845 	parent->connected_children.Add(socket);
846 	socket->is_connected = true;
847 
848 	// notify parent
849 	if (parent->select_pool)
850 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
851 
852 	return B_OK;
853 }
854 
855 
856 /*!	The socket has been aborted. Steals the parent's reference, and releases
857 	it.
858 */
859 status_t
860 socket_aborted(net_socket* _socket)
861 {
862 	net_socket_private* socket = (net_socket_private*)_socket;
863 
864 	TRACE("socket_aborted(%p)\n", socket);
865 
866 	BReference<net_socket_private> parent = socket->parent.GetReference();
867 	if (!parent.IsSet())
868 		return B_BAD_VALUE;
869 
870 	MutexLocker _(parent->lock);
871 
872 	if (socket->is_connected)
873 		parent->connected_children.Remove(socket);
874 	else
875 		parent->pending_children.Remove(socket);
876 
877 	parent->child_count--;
878 	socket->RemoveFromParent();
879 
880 	return B_OK;
881 }
882 
883 
884 //	#pragma mark - notifications
885 
886 
887 status_t
888 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
889 {
890 	net_socket_private* socket = (net_socket_private*)_socket;
891 
892 	mutex_lock(&socket->lock);
893 
894 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
895 		event);
896 
897 	mutex_unlock(&socket->lock);
898 
899 	if (status != B_OK)
900 		return status;
901 
902 	// check if the event is already present
903 	// TODO: add support for poll() types
904 
905 	switch (event) {
906 		case B_SELECT_READ:
907 		{
908 			ssize_t available = socket_read_avail(socket);
909 			if ((ssize_t)socket->receive.low_water_mark <= available
910 				|| available < B_OK)
911 				notify_select_event(sync, event);
912 			break;
913 		}
914 		case B_SELECT_WRITE:
915 		{
916 			if ((socket->options & SO_ACCEPTCONN) != 0)
917 				break;
918 
919 			ssize_t available = socket_send_avail(socket);
920 			if ((ssize_t)socket->send.low_water_mark <= available
921 				|| available < B_OK)
922 				notify_select_event(sync, event);
923 			break;
924 		}
925 		case B_SELECT_ERROR:
926 			if (socket->error != B_OK)
927 				notify_select_event(sync, event);
928 			break;
929 	}
930 
931 	return B_OK;
932 }
933 
934 
935 status_t
936 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
937 {
938 	net_socket_private* socket = (net_socket_private*)_socket;
939 
940 	MutexLocker _(socket->lock);
941 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
942 }
943 
944 
945 status_t
946 socket_notify(net_socket* _socket, uint8 event, int32 value)
947 {
948 	net_socket_private* socket = (net_socket_private*)_socket;
949 	bool notify = true;
950 
951 	switch (event) {
952 		case B_SELECT_READ:
953 			if ((ssize_t)socket->receive.low_water_mark > value
954 				&& value >= B_OK)
955 				notify = false;
956 			break;
957 
958 		case B_SELECT_WRITE:
959 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
960 				notify = false;
961 			break;
962 
963 		case B_SELECT_ERROR:
964 			socket->error = value;
965 			break;
966 	}
967 
968 	MutexLocker _(socket->lock);
969 
970 	if (notify && socket->select_pool != NULL) {
971 		notify_select_event_pool(socket->select_pool, event);
972 
973 		if (event == B_SELECT_ERROR) {
974 			// always notify read/write on error
975 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
976 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
977 		}
978 	}
979 
980 	return B_OK;
981 }
982 
983 
984 //	#pragma mark - standard socket API
985 
986 
987 int
988 socket_accept(net_socket* socket, struct sockaddr* address,
989 	socklen_t* _addressLength, net_socket** _acceptedSocket)
990 {
991 	if ((socket->options & SO_ACCEPTCONN) == 0)
992 		return B_BAD_VALUE;
993 
994 	net_socket* accepted;
995 	status_t status = socket->first_info->accept(socket->first_protocol,
996 		&accepted);
997 	if (status != B_OK)
998 		return status;
999 
1000 	if (address && *_addressLength > 0) {
1001 		memcpy(address, &accepted->peer, min_c(*_addressLength,
1002 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
1003 		*_addressLength = accepted->peer.ss_len;
1004 	}
1005 
1006 	*_acceptedSocket = accepted;
1007 	return B_OK;
1008 }
1009 
1010 
1011 int
1012 socket_bind(net_socket* socket, const struct sockaddr* address,
1013 	socklen_t addressLength)
1014 {
1015 	sockaddr empty;
1016 	if (address == NULL) {
1017 		// special - try to bind to an empty address, like INADDR_ANY
1018 		memset(&empty, 0, sizeof(sockaddr));
1019 		empty.sa_len = sizeof(sockaddr);
1020 		empty.sa_family = socket->family;
1021 
1022 		address = &empty;
1023 		addressLength = sizeof(sockaddr);
1024 	}
1025 
1026 	if (socket->address.ss_len != 0) {
1027 		status_t status = socket->first_info->unbind(socket->first_protocol,
1028 			(sockaddr*)&socket->address);
1029 		if (status != B_OK)
1030 			return status;
1031 	}
1032 
1033 	memcpy(&socket->address, address, sizeof(sockaddr));
1034 	socket->address.ss_len = sizeof(sockaddr_storage);
1035 
1036 	status_t status = socket->first_info->bind(socket->first_protocol,
1037 		(sockaddr*)address);
1038 	if (status != B_OK) {
1039 		// clear address again, as binding failed
1040 		socket->address.ss_len = 0;
1041 	}
1042 
1043 	return status;
1044 }
1045 
1046 
1047 int
1048 socket_connect(net_socket* socket, const struct sockaddr* address,
1049 	socklen_t addressLength)
1050 {
1051 	if (address == NULL || addressLength == 0)
1052 		return ENETUNREACH;
1053 
1054 	if (socket->address.ss_len == 0) {
1055 		// try to bind first
1056 		status_t status = socket_bind(socket, NULL, 0);
1057 		if (status != B_OK)
1058 			return status;
1059 	}
1060 
1061 	return socket->first_info->connect(socket->first_protocol, address);
1062 }
1063 
1064 
1065 int
1066 socket_getpeername(net_socket* _socket, struct sockaddr* address,
1067 	socklen_t* _addressLength)
1068 {
1069 	net_socket_private* socket = (net_socket_private*)_socket;
1070 	if (!socket->is_connected || socket->peer.ss_len == 0)
1071 		return ENOTCONN;
1072 
1073 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1074 	*_addressLength = socket->peer.ss_len;
1075 	return B_OK;
1076 }
1077 
1078 
1079 int
1080 socket_getsockname(net_socket* socket, struct sockaddr* address,
1081 	socklen_t* _addressLength)
1082 {
1083 	if (socket->address.ss_len == 0) {
1084 		struct sockaddr buffer;
1085 		memset(&buffer, 0, sizeof(buffer));
1086 		buffer.sa_family = socket->family;
1087 
1088 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
1089 		*_addressLength = sizeof(buffer);
1090 		return B_OK;
1091 	}
1092 
1093 	memcpy(address, &socket->address, min_c(*_addressLength,
1094 		socket->address.ss_len));
1095 	*_addressLength = socket->address.ss_len;
1096 	return B_OK;
1097 }
1098 
1099 
1100 status_t
1101 socket_get_option(net_socket* socket, int level, int option, void* value,
1102 	int* _length)
1103 {
1104 	if (level != SOL_SOCKET)
1105 		return ENOPROTOOPT;
1106 
1107 	switch (option) {
1108 		case SO_SNDBUF:
1109 		{
1110 			uint32* size = (uint32*)value;
1111 			*size = socket->send.buffer_size;
1112 			*_length = sizeof(uint32);
1113 			return B_OK;
1114 		}
1115 
1116 		case SO_RCVBUF:
1117 		{
1118 			uint32* size = (uint32*)value;
1119 			*size = socket->receive.buffer_size;
1120 			*_length = sizeof(uint32);
1121 			return B_OK;
1122 		}
1123 
1124 		case SO_SNDLOWAT:
1125 		{
1126 			uint32* size = (uint32*)value;
1127 			*size = socket->send.low_water_mark;
1128 			*_length = sizeof(uint32);
1129 			return B_OK;
1130 		}
1131 
1132 		case SO_RCVLOWAT:
1133 		{
1134 			uint32* size = (uint32*)value;
1135 			*size = socket->receive.low_water_mark;
1136 			*_length = sizeof(uint32);
1137 			return B_OK;
1138 		}
1139 
1140 		case SO_RCVTIMEO:
1141 		case SO_SNDTIMEO:
1142 		{
1143 			if (*_length < (int)sizeof(struct timeval))
1144 				return B_BAD_VALUE;
1145 
1146 			bigtime_t timeout;
1147 			if (option == SO_SNDTIMEO)
1148 				timeout = socket->send.timeout;
1149 			else
1150 				timeout = socket->receive.timeout;
1151 			if (timeout == B_INFINITE_TIMEOUT)
1152 				timeout = 0;
1153 
1154 			struct timeval* timeval = (struct timeval*)value;
1155 			timeval->tv_sec = timeout / 1000000LL;
1156 			timeval->tv_usec = timeout % 1000000LL;
1157 
1158 			*_length = sizeof(struct timeval);
1159 			return B_OK;
1160 		}
1161 
1162 		case SO_NONBLOCK:
1163 		{
1164 			int32* _set = (int32*)value;
1165 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1166 			*_length = sizeof(int32);
1167 			return B_OK;
1168 		}
1169 
1170 		case SO_ACCEPTCONN:
1171 		case SO_BROADCAST:
1172 		case SO_DEBUG:
1173 		case SO_DONTROUTE:
1174 		case SO_KEEPALIVE:
1175 		case SO_OOBINLINE:
1176 		case SO_REUSEADDR:
1177 		case SO_REUSEPORT:
1178 		case SO_USELOOPBACK:
1179 		{
1180 			int32* _set = (int32*)value;
1181 			*_set = (socket->options & option) != 0;
1182 			*_length = sizeof(int32);
1183 			return B_OK;
1184 		}
1185 
1186 		case SO_TYPE:
1187 		{
1188 			int32* _set = (int32*)value;
1189 			*_set = socket->type;
1190 			*_length = sizeof(int32);
1191 			return B_OK;
1192 		}
1193 
1194 		case SO_ERROR:
1195 		{
1196 			int32* _set = (int32*)value;
1197 			*_set = socket->error;
1198 			*_length = sizeof(int32);
1199 
1200 			socket->error = B_OK;
1201 				// clear error upon retrieval
1202 			return B_OK;
1203 		}
1204 
1205 		default:
1206 			break;
1207 	}
1208 
1209 	dprintf("socket_getsockopt: unknown option %d\n", option);
1210 	return ENOPROTOOPT;
1211 }
1212 
1213 
1214 int
1215 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1216 	int* _length)
1217 {
1218 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1219 		level, option, value, _length);
1220 }
1221 
1222 
1223 int
1224 socket_listen(net_socket* socket, int backlog)
1225 {
1226 	status_t status = socket->first_info->listen(socket->first_protocol,
1227 		backlog);
1228 	if (status == B_OK)
1229 		socket->options |= SO_ACCEPTCONN;
1230 
1231 	return status;
1232 }
1233 
1234 
1235 ssize_t
1236 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1237 	int flags)
1238 {
1239 	// If the protocol sports read_data_no_buffer() we use it.
1240 	if (socket->first_info->read_data_no_buffer != NULL)
1241 		return socket_receive_no_buffer(socket, header, data, length, flags);
1242 
1243 	size_t totalLength = length;
1244 	net_buffer* buffer;
1245 	int i;
1246 
1247 	// the convention to this function is that have header been
1248 	// present, { data, length } would have been iovec[0] and is
1249 	// always considered like that
1250 
1251 	if (header) {
1252 		// calculate the length considering all of the extra buffers
1253 		for (i = 1; i < header->msg_iovlen; i++)
1254 			totalLength += header->msg_iov[i].iov_len;
1255 	}
1256 
1257 	status_t status = socket->first_info->read_data(
1258 		socket->first_protocol, totalLength, flags, &buffer);
1259 	if (status != B_OK)
1260 		return status;
1261 
1262 	// process ancillary data
1263 	if (header != NULL) {
1264 		if (buffer != NULL && header->msg_control != NULL) {
1265 			ancillary_data_container* container
1266 				= gNetBufferModule.get_ancillary_data(buffer);
1267 			if (container != NULL)
1268 				status = process_ancillary_data(socket, container, header);
1269 			else
1270 				status = process_ancillary_data(socket, buffer, header);
1271 			if (status != B_OK) {
1272 				gNetBufferModule.free(buffer);
1273 				return status;
1274 			}
1275 		} else
1276 			header->msg_controllen = 0;
1277 	}
1278 
1279 	// TODO: - returning a NULL buffer when received 0 bytes
1280 	//         may not make much sense as we still need the address
1281 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1282 
1283 	size_t nameLen = 0;
1284 
1285 	if (header) {
1286 		// TODO: - consider the control buffer options
1287 		nameLen = header->msg_namelen;
1288 		header->msg_namelen = 0;
1289 		header->msg_flags = 0;
1290 	}
1291 
1292 	if (buffer == NULL)
1293 		return 0;
1294 
1295 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1296 
1297 	length = min_c(bytesReceived, length);
1298 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1299 		gNetBufferModule.free(buffer);
1300 		return ENOBUFS;
1301 	}
1302 
1303 	// if first copy was a success, proceed to following
1304 	// copies as required
1305 	bytesCopied += length;
1306 
1307 	if (header) {
1308 		// we only start considering at iovec[1]
1309 		// as { data, length } is iovec[0]
1310 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1311 			iovec& vec = header->msg_iov[i];
1312 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1313 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1314 					toRead) < B_OK) {
1315 				break;
1316 			}
1317 
1318 			bytesCopied += toRead;
1319 		}
1320 
1321 		if (header->msg_name != NULL) {
1322 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1323 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1324 		}
1325 	}
1326 
1327 	gNetBufferModule.free(buffer);
1328 
1329 	if (bytesCopied < bytesReceived) {
1330 		if (header)
1331 			header->msg_flags = MSG_TRUNC;
1332 
1333 		if (flags & MSG_TRUNC)
1334 			return bytesReceived;
1335 	}
1336 
1337 	return bytesCopied;
1338 }
1339 
1340 
1341 ssize_t
1342 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1343 	int flags)
1344 {
1345 	const sockaddr* address = NULL;
1346 	socklen_t addressLength = 0;
1347 	size_t bytesLeft = length;
1348 
1349 	if (length > SSIZE_MAX)
1350 		return B_BAD_VALUE;
1351 
1352 	ancillary_data_container* ancillaryData = NULL;
1353 	CObjectDeleter<
1354 		ancillary_data_container, void, delete_ancillary_data_container>
1355 		ancillaryDataDeleter;
1356 
1357 	if (header != NULL) {
1358 		address = (const sockaddr*)header->msg_name;
1359 		addressLength = header->msg_namelen;
1360 
1361 		// get the ancillary data
1362 		if (header->msg_control != NULL) {
1363 			ancillaryData = create_ancillary_data_container();
1364 			if (ancillaryData == NULL)
1365 				return B_NO_MEMORY;
1366 			ancillaryDataDeleter.SetTo(ancillaryData);
1367 
1368 			status_t status = add_ancillary_data(socket, ancillaryData,
1369 				(cmsghdr*)header->msg_control, header->msg_controllen);
1370 			if (status != B_OK)
1371 				return status;
1372 		}
1373 	}
1374 
1375 	if (addressLength == 0)
1376 		address = NULL;
1377 	else if (address == NULL)
1378 		return B_BAD_VALUE;
1379 
1380 	if (socket->peer.ss_len != 0) {
1381 		if (address != NULL)
1382 			return EISCONN;
1383 
1384 		// socket is connected, we use that address
1385 		address = (struct sockaddr*)&socket->peer;
1386 		addressLength = socket->peer.ss_len;
1387 	}
1388 
1389 	if (address == NULL || addressLength == 0) {
1390 		// don't know where to send to:
1391 		return EDESTADDRREQ;
1392 	}
1393 
1394 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1395 		&& bytesLeft > socket->send.buffer_size)
1396 		return EMSGSIZE;
1397 
1398 	if (socket->address.ss_len == 0) {
1399 		// try to bind first
1400 		status_t status = socket_bind(socket, NULL, 0);
1401 		if (status != B_OK)
1402 			return status;
1403 	}
1404 
1405 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1406 	if (socket->first_info->send_data_no_buffer != NULL) {
1407 		iovec stackVec = { (void*)data, length };
1408 		iovec* vecs = header ? header->msg_iov : &stackVec;
1409 		int vecCount = header ? header->msg_iovlen : 1;
1410 
1411 		ssize_t written = socket->first_info->send_data_no_buffer(
1412 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1413 			addressLength);
1414 		if (written > 0)
1415 			ancillaryDataDeleter.Detach();
1416 		return written;
1417 	}
1418 
1419 	// By convention, if a header is given, the (data, length) equals the first
1420 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1421 	// the size of the remaining ones.
1422 	if (header != NULL) {
1423 		if (header->msg_iovlen <= 1)
1424 			header = NULL;
1425 		else {
1426 // TODO: The iovecs have already been copied to kernel space. Simplify!
1427 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1428 				header->msg_iovlen - 1);
1429 		}
1430 	}
1431 
1432 	ssize_t bytesSent = 0;
1433 	size_t vecOffset = 0;
1434 	uint32 vecIndex = 0;
1435 
1436 	while (bytesLeft > 0) {
1437 		// TODO: useful, maybe even computed header space!
1438 		net_buffer* buffer = gNetBufferModule.create(256);
1439 		if (buffer == NULL)
1440 			return ENOBUFS;
1441 
1442 		while (buffer->size < socket->send.buffer_size
1443 			&& buffer->size < bytesLeft) {
1444 			if (vecIndex > 0 && vecOffset == 0) {
1445 				// retrieve next iovec buffer from header
1446 				iovec vec;
1447 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1448 						< B_OK) {
1449 					gNetBufferModule.free(buffer);
1450 					return B_BAD_ADDRESS;
1451 				}
1452 
1453 				data = vec.iov_base;
1454 				length = vec.iov_len;
1455 			}
1456 
1457 			size_t bytes = length;
1458 			if (buffer->size + bytes > socket->send.buffer_size)
1459 				bytes = socket->send.buffer_size - buffer->size;
1460 
1461 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1462 				gNetBufferModule.free(buffer);
1463 				return ENOBUFS;
1464 			}
1465 
1466 			if (bytes != length) {
1467 				// partial send
1468 				vecOffset = bytes;
1469 				length -= vecOffset;
1470 				data = (uint8*)data + vecOffset;
1471 			} else if (header != NULL) {
1472 				// proceed with next buffer, if any
1473 				vecOffset = 0;
1474 				vecIndex++;
1475 
1476 				if (vecIndex >= (uint32)header->msg_iovlen)
1477 					break;
1478 			}
1479 		}
1480 
1481 		// attach ancillary data to the first buffer
1482 		status_t status;
1483 		if (ancillaryData != NULL) {
1484 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1485 			ancillaryDataDeleter.Detach();
1486 			ancillaryData = NULL;
1487 		}
1488 
1489 		size_t bufferSize = buffer->size;
1490 		buffer->flags = flags;
1491 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1492 		memcpy(buffer->destination, address, addressLength);
1493 		buffer->destination->sa_len = addressLength;
1494 
1495 		status = socket->first_info->send_data(socket->first_protocol, buffer);
1496 		if (status != B_OK) {
1497 			size_t sizeAfterSend = buffer->size;
1498 			gNetBufferModule.free(buffer);
1499 
1500 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1501 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1502 				// this appears to be a partial write
1503 				return bytesSent + (bufferSize - sizeAfterSend);
1504 			}
1505 			return status;
1506 		}
1507 
1508 		bytesLeft -= bufferSize;
1509 		bytesSent += bufferSize;
1510 	}
1511 
1512 	return bytesSent;
1513 }
1514 
1515 
1516 status_t
1517 socket_set_option(net_socket* socket, int level, int option, const void* value,
1518 	int length)
1519 {
1520 	if (level != SOL_SOCKET)
1521 		return ENOPROTOOPT;
1522 
1523 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1524 
1525 	switch (option) {
1526 		// TODO: implement other options!
1527 		case SO_LINGER:
1528 		{
1529 			if (length < (int)sizeof(struct linger))
1530 				return B_BAD_VALUE;
1531 
1532 			struct linger* linger = (struct linger*)value;
1533 			if (linger->l_onoff) {
1534 				socket->options |= SO_LINGER;
1535 				socket->linger = linger->l_linger;
1536 			} else {
1537 				socket->options &= ~SO_LINGER;
1538 				socket->linger = 0;
1539 			}
1540 			return B_OK;
1541 		}
1542 
1543 		case SO_SNDBUF:
1544 			if (length != sizeof(uint32))
1545 				return B_BAD_VALUE;
1546 
1547 			socket->send.buffer_size = *(const uint32*)value;
1548 			return B_OK;
1549 
1550 		case SO_RCVBUF:
1551 			if (length != sizeof(uint32))
1552 				return B_BAD_VALUE;
1553 
1554 			socket->receive.buffer_size = *(const uint32*)value;
1555 			return B_OK;
1556 
1557 		case SO_SNDLOWAT:
1558 			if (length != sizeof(uint32))
1559 				return B_BAD_VALUE;
1560 
1561 			socket->send.low_water_mark = *(const uint32*)value;
1562 			return B_OK;
1563 
1564 		case SO_RCVLOWAT:
1565 			if (length != sizeof(uint32))
1566 				return B_BAD_VALUE;
1567 
1568 			socket->receive.low_water_mark = *(const uint32*)value;
1569 			return B_OK;
1570 
1571 		case SO_RCVTIMEO:
1572 		case SO_SNDTIMEO:
1573 		{
1574 			if (length != sizeof(struct timeval))
1575 				return B_BAD_VALUE;
1576 
1577 			const struct timeval* timeval = (const struct timeval*)value;
1578 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1579 			if (timeout == 0)
1580 				timeout = B_INFINITE_TIMEOUT;
1581 
1582 			if (option == SO_SNDTIMEO)
1583 				socket->send.timeout = timeout;
1584 			else
1585 				socket->receive.timeout = timeout;
1586 			return B_OK;
1587 		}
1588 
1589 		case SO_NONBLOCK:
1590 			if (length != sizeof(int32))
1591 				return B_BAD_VALUE;
1592 
1593 			if (*(const int32*)value) {
1594 				socket->send.timeout = 0;
1595 				socket->receive.timeout = 0;
1596 			} else {
1597 				socket->send.timeout = B_INFINITE_TIMEOUT;
1598 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1599 			}
1600 			return B_OK;
1601 
1602 		case SO_BROADCAST:
1603 		case SO_DEBUG:
1604 		case SO_DONTROUTE:
1605 		case SO_KEEPALIVE:
1606 		case SO_OOBINLINE:
1607 		case SO_REUSEADDR:
1608 		case SO_REUSEPORT:
1609 		case SO_USELOOPBACK:
1610 			if (length != sizeof(int32))
1611 				return B_BAD_VALUE;
1612 
1613 			if (*(const int32*)value)
1614 				socket->options |= option;
1615 			else
1616 				socket->options &= ~option;
1617 			return B_OK;
1618 
1619 		case SO_BINDTODEVICE:
1620 		{
1621 			if (length != sizeof(uint32))
1622 				return B_BAD_VALUE;
1623 
1624 			// TODO: we might want to check if the device exists at all
1625 			// (although it doesn't really harm when we don't)
1626 			socket->bound_to_device = *(const uint32*)value;
1627 			return B_OK;
1628 		}
1629 
1630 		default:
1631 			break;
1632 	}
1633 
1634 	dprintf("socket_setsockopt: unknown option %d\n", option);
1635 	return ENOPROTOOPT;
1636 }
1637 
1638 
1639 int
1640 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1641 	int length)
1642 {
1643 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1644 		level, option, value, length);
1645 }
1646 
1647 
1648 int
1649 socket_shutdown(net_socket* socket, int direction)
1650 {
1651 	return socket->first_info->shutdown(socket->first_protocol, direction);
1652 }
1653 
1654 
1655 status_t
1656 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1657 {
1658 	sockets[0] = NULL;
1659 	sockets[1] = NULL;
1660 
1661 	// create sockets
1662 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1663 	if (error != B_OK)
1664 		return error;
1665 
1666 	error = socket_open(family, type, protocol, &sockets[1]);
1667 
1668 	// bind one
1669 	if (error == B_OK)
1670 		error = socket_bind(sockets[0], NULL, 0);
1671 
1672 	// start listening
1673 	if (error == B_OK)
1674 		error = socket_listen(sockets[0], 1);
1675 
1676 	// connect them
1677 	if (error == B_OK) {
1678 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1679 			sockets[0]->address.ss_len);
1680 	}
1681 
1682 	// accept a socket
1683 	net_socket* acceptedSocket = NULL;
1684 	if (error == B_OK)
1685 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1686 
1687 	if (error == B_OK) {
1688 		// everything worked: close the listener socket
1689 		socket_close(sockets[0]);
1690 		socket_free(sockets[0]);
1691 		sockets[0] = acceptedSocket;
1692 	} else {
1693 		// close sockets on error
1694 		for (int i = 0; i < 2; i++) {
1695 			if (sockets[i] != NULL) {
1696 				socket_close(sockets[i]);
1697 				socket_free(sockets[i]);
1698 				sockets[i] = NULL;
1699 			}
1700 		}
1701 	}
1702 
1703 	return error;
1704 }
1705 
1706 
1707 //	#pragma mark -
1708 
1709 
1710 static status_t
1711 socket_std_ops(int32 op, ...)
1712 {
1713 	switch (op) {
1714 		case B_MODULE_INIT:
1715 		{
1716 			new (&sSocketList) SocketList;
1717 			mutex_init(&sSocketLock, "socket list");
1718 
1719 #if ENABLE_DEBUGGER_COMMANDS
1720 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1721 			add_debugger_command("socket", dump_socket, "dumps a socket");
1722 #endif
1723 			return B_OK;
1724 		}
1725 		case B_MODULE_UNINIT:
1726 			ASSERT(sSocketList.IsEmpty());
1727 			mutex_destroy(&sSocketLock);
1728 
1729 #if ENABLE_DEBUGGER_COMMANDS
1730 			remove_debugger_command("socket", dump_socket);
1731 			remove_debugger_command("sockets", dump_sockets);
1732 #endif
1733 			return B_OK;
1734 
1735 		default:
1736 			return B_ERROR;
1737 	}
1738 }
1739 
1740 
1741 net_socket_module_info gNetSocketModule = {
1742 	{
1743 		NET_SOCKET_MODULE_NAME,
1744 		0,
1745 		socket_std_ops
1746 	},
1747 	socket_open,
1748 	socket_close,
1749 	socket_free,
1750 
1751 	socket_readv,
1752 	socket_writev,
1753 	socket_control,
1754 
1755 	socket_read_avail,
1756 	socket_send_avail,
1757 
1758 	socket_send_data,
1759 	socket_receive_data,
1760 
1761 	socket_get_option,
1762 	socket_set_option,
1763 
1764 	socket_get_next_stat,
1765 
1766 	// connections
1767 	socket_acquire,
1768 	socket_release,
1769 	socket_spawn_pending,
1770 	socket_dequeue_connected,
1771 	socket_count_connected,
1772 	socket_set_max_backlog,
1773 	socket_has_parent,
1774 	socket_connected,
1775 	socket_aborted,
1776 
1777 	// notifications
1778 	socket_request_notification,
1779 	socket_cancel_notification,
1780 	socket_notify,
1781 
1782 	// standard socket API
1783 	socket_accept,
1784 	socket_bind,
1785 	socket_connect,
1786 	socket_getpeername,
1787 	socket_getsockname,
1788 	socket_getsockopt,
1789 	socket_listen,
1790 	socket_receive,
1791 	socket_send,
1792 	socket_setsockopt,
1793 	socket_shutdown,
1794 	socket_socketpair
1795 };
1796 
1797