xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 02354704729d38c3b078c696adc1bbbd33cbcf72)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static size_t
166 compute_user_iovec_length(iovec* userVec, uint32 count)
167 {
168 	size_t length = 0;
169 
170 	for (uint32 i = 0; i < count; i++) {
171 		iovec vec;
172 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
173 			return 0;
174 
175 		length += vec.iov_len;
176 	}
177 
178 	return length;
179 }
180 
181 
182 static status_t
183 create_socket(int family, int type, int protocol, net_socket_private** _socket)
184 {
185 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
186 	if (socket == NULL)
187 		return B_NO_MEMORY;
188 	status_t status = socket->InitCheck();
189 	if (status != B_OK) {
190 		delete socket;
191 		return status;
192 	}
193 
194 	socket->family = family;
195 	socket->type = type;
196 	socket->protocol = protocol;
197 
198 	status = get_domain_protocols(socket);
199 	if (status != B_OK) {
200 		delete socket;
201 		return status;
202 	}
203 
204 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
205 		socket->type, socket->protocol);
206 
207 #ifdef TRACE_SOCKET
208 	net_protocol* current = socket->first_protocol;
209 	for (int i = 0; current != NULL; current = current->next, i++)
210 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
211 #endif
212 
213 	*_socket = socket;
214 	return B_OK;
215 }
216 
217 
218 static status_t
219 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
220 	void* data, size_t dataLen)
221 {
222 	cmsghdr* header = (cmsghdr*)data;
223 
224 	if (dataLen == 0)
225 		return B_OK;
226 
227 	if (socket->first_info->add_ancillary_data == NULL)
228 		return B_NOT_SUPPORTED;
229 
230 	while (true) {
231 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
232 			return B_BAD_VALUE;
233 
234 		status_t status = socket->first_info->add_ancillary_data(
235 			socket->first_protocol, container, header);
236 		if (status != B_OK)
237 			return status;
238 
239 		if (dataLen <= _ALIGN(header->cmsg_len))
240 			break;
241 		dataLen -= _ALIGN(header->cmsg_len);
242 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
243 	}
244 
245 	return B_OK;
246 }
247 
248 
249 static status_t
250 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
251 	msghdr* messageHeader)
252 {
253 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
254 	int dataBufferLen = messageHeader->msg_controllen;
255 
256 	if (container == NULL || dataBuffer == NULL) {
257 		messageHeader->msg_controllen = 0;
258 		return B_OK;
259 	}
260 
261 	ancillary_data_header header;
262 	void* data = NULL;
263 
264 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
265 		if (socket->first_info->process_ancillary_data == NULL)
266 			return B_NOT_SUPPORTED;
267 
268 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
269 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
270 		if (bytesWritten < 0)
271 			return bytesWritten;
272 
273 		dataBuffer += bytesWritten;
274 		dataBufferLen -= bytesWritten;
275 	}
276 
277 	messageHeader->msg_controllen -= dataBufferLen;
278 
279 	return B_OK;
280 }
281 
282 
283 static status_t
284 process_ancillary_data(net_socket* socket,
285 	net_buffer* buffer, msghdr* messageHeader)
286 {
287 	void *dataBuffer = messageHeader->msg_control;
288 	ssize_t bytesWritten;
289 
290 	if (dataBuffer == NULL) {
291 		messageHeader->msg_controllen = 0;
292 		return B_OK;
293 	}
294 
295 	if (socket->first_info->process_ancillary_data_no_container == NULL)
296 		return B_NOT_SUPPORTED;
297 
298 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
299 		socket->first_protocol, buffer, dataBuffer,
300 		messageHeader->msg_controllen);
301 	if (bytesWritten < 0)
302 		return bytesWritten;
303 	messageHeader->msg_controllen = bytesWritten;
304 
305 	return B_OK;
306 }
307 
308 
309 static ssize_t
310 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
311 	size_t length, int flags)
312 {
313 	iovec stackVec = { data, length };
314 	iovec* vecs = header ? header->msg_iov : &stackVec;
315 	int vecCount = header ? header->msg_iovlen : 1;
316 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
317 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
318 
319 	ancillary_data_container* ancillaryData = NULL;
320 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
321 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
322 		addressLen);
323 	if (bytesRead < 0)
324 		return bytesRead;
325 
326 	CObjectDeleter<
327 		ancillary_data_container, void, delete_ancillary_data_container>
328 		ancillaryDataDeleter(ancillaryData);
329 
330 	// process ancillary data
331 	if (header != NULL) {
332 		status_t status = process_ancillary_data(socket, ancillaryData, header);
333 		if (status != B_OK)
334 			return status;
335 
336 		header->msg_flags = 0;
337 	}
338 
339 	return bytesRead;
340 }
341 
342 
343 #if ENABLE_DEBUGGER_COMMANDS
344 
345 
346 static void
347 print_socket_line(net_socket_private* socket, const char* prefix)
348 {
349 	BReference<net_socket_private> parent;
350 	if (socket->parent.PrivatePointer() != NULL)
351 		parent = socket->parent.GetReference();
352 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
353 		socket->family, socket->type, socket->protocol, socket->owner,
354 		socket->first_protocol, socket->first_info, parent.Get(),
355 		parent.IsSet() ? socket->is_connected ? " (c)" : " (p)" : "");
356 }
357 
358 
359 static int
360 dump_socket(int argc, char** argv)
361 {
362 	if (argc < 2) {
363 		kprintf("usage: %s [address]\n", argv[0]);
364 		return 0;
365 	}
366 
367 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
368 
369 	kprintf("SOCKET %p\n", socket);
370 	kprintf("  family.type.protocol: %d.%d.%d\n",
371 		socket->family, socket->type, socket->protocol);
372 	BReference<net_socket_private> parent;
373 	if (socket->parent.PrivatePointer() != NULL)
374 		parent = socket->parent.GetReference();
375 	kprintf("  parent:               %p\n", parent.Get());
376 	kprintf("  first protocol:       %p\n", socket->first_protocol);
377 	kprintf("  first module_info:    %p\n", socket->first_info);
378 	kprintf("  options:              %x\n", socket->options);
379 	kprintf("  linger:               %d\n", socket->linger);
380 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
381 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
382 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
383 	kprintf("  is connected:         %d\n", socket->is_connected);
384 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
385 
386 	if (socket->child_count == 0)
387 		return 0;
388 
389 	kprintf("    pending children:\n");
390 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
391 	while (net_socket_private* child = iterator.Next()) {
392 		print_socket_line(child, "      ");
393 	}
394 
395 	kprintf("    connected children:\n");
396 	iterator = socket->connected_children.GetIterator();
397 	while (net_socket_private* child = iterator.Next()) {
398 		print_socket_line(child, "      ");
399 	}
400 
401 	return 0;
402 }
403 
404 
405 static int
406 dump_sockets(int argc, char** argv)
407 {
408 	kprintf("address        kind  owner protocol   module_info parent\n");
409 
410 	SocketList::Iterator iterator = sSocketList.GetIterator();
411 	while (net_socket_private* socket = iterator.Next()) {
412 		print_socket_line(socket, "");
413 
414 		SocketList::Iterator childIterator
415 			= socket->pending_children.GetIterator();
416 		while (net_socket_private* child = childIterator.Next()) {
417 			print_socket_line(child, " ");
418 		}
419 
420 		childIterator = socket->connected_children.GetIterator();
421 		while (net_socket_private* child = childIterator.Next()) {
422 			print_socket_line(child, " ");
423 		}
424 	}
425 
426 	return 0;
427 }
428 
429 
430 #endif	// ENABLE_DEBUGGER_COMMANDS
431 
432 
433 //	#pragma mark -
434 
435 
436 status_t
437 socket_open(int family, int type, int protocol, net_socket** _socket)
438 {
439 	net_socket_private* socket;
440 	status_t status = create_socket(family, type, protocol, &socket);
441 	if (status != B_OK)
442 		return status;
443 
444 	status = socket->first_info->open(socket->first_protocol);
445 	if (status != B_OK) {
446 		delete socket;
447 		return status;
448 	}
449 
450 	socket->owner = team_get_current_team_id();
451 	socket->is_in_socket_list = true;
452 
453 	mutex_lock(&sSocketLock);
454 	sSocketList.Add(socket);
455 	mutex_unlock(&sSocketLock);
456 
457 	*_socket = socket;
458 	return B_OK;
459 }
460 
461 
462 status_t
463 socket_close(net_socket* _socket)
464 {
465 	net_socket_private* socket = (net_socket_private*)_socket;
466 	return socket->first_info->close(socket->first_protocol);
467 }
468 
469 
470 void
471 socket_free(net_socket* _socket)
472 {
473 	net_socket_private* socket = (net_socket_private*)_socket;
474 	socket->first_info->free(socket->first_protocol);
475 	socket->ReleaseReference();
476 }
477 
478 
479 status_t
480 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
481 	size_t* _length)
482 {
483 	return -1;
484 }
485 
486 
487 status_t
488 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
489 	size_t* _length)
490 {
491 	if (socket->peer.ss_len == 0)
492 		return ECONNRESET;
493 
494 	if (socket->address.ss_len == 0) {
495 		// try to bind first
496 		status_t status = socket_bind(socket, NULL, 0);
497 		if (status != B_OK)
498 			return status;
499 	}
500 
501 	// TODO: useful, maybe even computed header space!
502 	net_buffer* buffer = gNetBufferModule.create(256);
503 	if (buffer == NULL)
504 		return ENOBUFS;
505 
506 	// copy data into buffer
507 
508 	for (uint32 i = 0; i < vecCount; i++) {
509 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
510 				vecs[i].iov_len) < B_OK) {
511 			gNetBufferModule.free(buffer);
512 			return ENOBUFS;
513 		}
514 	}
515 
516 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
517 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
518 	size_t size = buffer->size;
519 
520 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
521 		buffer);
522 	if (bytesWritten < B_OK) {
523 		if (buffer->size != size) {
524 			// this appears to be a partial write
525 			*_length = size - buffer->size;
526 		}
527 		gNetBufferModule.free(buffer);
528 		return bytesWritten;
529 	}
530 
531 	*_length = bytesWritten;
532 	return B_OK;
533 }
534 
535 
536 status_t
537 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
538 {
539 	switch (op) {
540 		case FIONBIO:
541 		{
542 			if (data == NULL)
543 				return B_BAD_VALUE;
544 
545 			int value;
546 			if (is_syscall()) {
547 				if (!IS_USER_ADDRESS(data)
548 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
549 					return B_BAD_ADDRESS;
550 				}
551 			} else
552 				value = *(int*)data;
553 
554 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
555 				sizeof(int));
556 		}
557 
558 		case FIONREAD:
559 		{
560 			if (data == NULL)
561 				return B_BAD_VALUE;
562 
563 			int available = (int)socket_read_avail(socket);
564 			if (available < 0)
565 				return available;
566 
567 			if (is_syscall()) {
568 				if (!IS_USER_ADDRESS(data)
569 					|| user_memcpy(data, &available, sizeof(available))
570 						!= B_OK) {
571 					return B_BAD_ADDRESS;
572 				}
573 			} else
574 				*(int*)data = available;
575 
576 			return B_OK;
577 		}
578 
579 		case B_SET_BLOCKING_IO:
580 		case B_SET_NONBLOCKING_IO:
581 		{
582 			int value = op == B_SET_NONBLOCKING_IO;
583 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
584 				sizeof(int));
585 		}
586 	}
587 
588 	return socket->first_info->control(socket->first_protocol,
589 		LEVEL_DRIVER_IOCTL, op, data, &length);
590 }
591 
592 
593 ssize_t
594 socket_read_avail(net_socket* socket)
595 {
596 	return socket->first_info->read_avail(socket->first_protocol);
597 }
598 
599 
600 ssize_t
601 socket_send_avail(net_socket* socket)
602 {
603 	return socket->first_info->send_avail(socket->first_protocol);
604 }
605 
606 
607 status_t
608 socket_send_data(net_socket* socket, net_buffer* buffer)
609 {
610 	return socket->first_info->send_data(socket->first_protocol,
611 		buffer);
612 }
613 
614 
615 status_t
616 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
617 	net_buffer** _buffer)
618 {
619 	status_t status = socket->first_info->read_data(socket->first_protocol,
620 		length, flags, _buffer);
621 	if (status != B_OK)
622 		return status;
623 
624 	if (*_buffer && length < (*_buffer)->size) {
625 		// discard any data behind the amount requested
626 		gNetBufferModule.trim(*_buffer, length);
627 	}
628 
629 	return status;
630 }
631 
632 
633 status_t
634 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
635 {
636 	MutexLocker locker(sSocketLock);
637 
638 	net_socket_private* socket = NULL;
639 	SocketList::Iterator iterator = sSocketList.GetIterator();
640 	uint32 cookie = *_cookie;
641 	uint32 count = 0;
642 
643 	while (true) {
644 		socket = iterator.Next();
645 		if (socket == NULL)
646 			return B_ENTRY_NOT_FOUND;
647 
648 		// TODO: also traverse the pending connections
649 		if (count == cookie)
650 			break;
651 
652 		if (family == -1 || family == socket->family)
653 			count++;
654 	}
655 
656 	*_cookie = count + 1;
657 
658 	stat->family = socket->family;
659 	stat->type = socket->type;
660 	stat->protocol = socket->protocol;
661 	stat->owner = socket->owner;
662 	stat->state[0] = '\0';
663 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
664 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
665 	stat->receive_queue_size = 0;
666 	stat->send_queue_size = 0;
667 
668 	// fill in protocol specific data (if supported by the protocol)
669 	size_t length = sizeof(net_stat);
670 	socket->first_info->control(socket->first_protocol, socket->protocol,
671 		NET_STAT_SOCKET, stat, &length);
672 
673 	return B_OK;
674 }
675 
676 
677 //	#pragma mark - connections
678 
679 
680 bool
681 socket_acquire(net_socket* _socket)
682 {
683 	net_socket_private* socket = (net_socket_private*)_socket;
684 
685 	// During destruction, the socket might still be accessible over its
686 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
687 	// socket anymore -- while not obvious, the endpoint protocol is responsible
688 	// for the proper locking here.
689 	if (socket->CountReferences() == 0)
690 		return false;
691 
692 	socket->AcquireReference();
693 	return true;
694 }
695 
696 
697 bool
698 socket_release(net_socket* _socket)
699 {
700 	net_socket_private* socket = (net_socket_private*)_socket;
701 	return socket->ReleaseReference();
702 }
703 
704 
705 status_t
706 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
707 {
708 	net_socket_private* parent = (net_socket_private*)_parent;
709 
710 	TRACE("%s(%p)\n", __FUNCTION__, parent);
711 
712 	MutexLocker locker(parent->lock);
713 
714 	// We actually accept more pending connections to compensate for those
715 	// that never complete, and also make sure at least a single connection
716 	// can always be accepted
717 	if (parent->child_count > 3 * parent->max_backlog / 2)
718 		return ENOBUFS;
719 
720 	net_socket_private* socket;
721 	status_t status = create_socket(parent->family, parent->type,
722 		parent->protocol, &socket);
723 	if (status != B_OK)
724 		return status;
725 
726 	// inherit parent's properties
727 	socket->send = parent->send;
728 	socket->receive = parent->receive;
729 	socket->options = parent->options & ~SO_ACCEPTCONN;
730 	socket->linger = parent->linger;
731 	socket->owner = parent->owner;
732 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
733 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
734 
735 	// add to the parent's list of pending connections
736 	parent->pending_children.Add(socket);
737 	socket->parent = parent;
738 	parent->child_count++;
739 
740 	*_socket = socket;
741 	return B_OK;
742 }
743 
744 
745 /*!	Dequeues a connected child from a parent socket.
746 	It also returns a reference with the child socket.
747 */
748 status_t
749 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
750 {
751 	net_socket_private* parent = (net_socket_private*)_parent;
752 
753 	mutex_lock(&parent->lock);
754 
755 	net_socket_private* socket = parent->connected_children.RemoveHead();
756 	if (socket != NULL) {
757 		socket->AcquireReference();
758 		socket->RemoveFromParent();
759 		parent->child_count--;
760 		*_socket = socket;
761 	}
762 
763 	mutex_unlock(&parent->lock);
764 
765 	if (socket == NULL)
766 		return B_ENTRY_NOT_FOUND;
767 
768 	return B_OK;
769 }
770 
771 
772 ssize_t
773 socket_count_connected(net_socket* _parent)
774 {
775 	net_socket_private* parent = (net_socket_private*)_parent;
776 
777 	MutexLocker _(parent->lock);
778 	return parent->connected_children.Count();
779 }
780 
781 
782 status_t
783 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
784 {
785 	net_socket_private* socket = (net_socket_private*)_socket;
786 
787 	// we enforce an upper limit of connections waiting to be accepted
788 	if (backlog > 256)
789 		backlog = 256;
790 
791 	MutexLocker _(socket->lock);
792 
793 	// first remove the pending connections, then the already connected
794 	// ones as needed
795 	net_socket_private* child;
796 	while (socket->child_count > backlog
797 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
798 		child->RemoveFromParent();
799 		socket->child_count--;
800 	}
801 	while (socket->child_count > backlog
802 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
803 		child->RemoveFromParent();
804 		socket->child_count--;
805 	}
806 
807 	socket->max_backlog = backlog;
808 	return B_OK;
809 }
810 
811 
812 /*!	Returns whether or not this socket has a parent. The parent might not be
813 	valid anymore, though.
814 */
815 bool
816 socket_has_parent(net_socket* _socket)
817 {
818 	net_socket_private* socket = (net_socket_private*)_socket;
819 	return socket->parent != NULL;
820 }
821 
822 
823 /*!	The socket has been connected. It will be moved to the connected queue
824 	of its parent socket.
825 */
826 status_t
827 socket_connected(net_socket* _socket)
828 {
829 	net_socket_private* socket = (net_socket_private*)_socket;
830 
831 	TRACE("socket_connected(%p)\n", socket);
832 
833 	if (socket->parent == NULL) {
834 		socket->is_connected = true;
835 		return B_OK;
836 	}
837 
838 	BReference<net_socket_private> parent = socket->parent.GetReference();
839 	if (!parent.IsSet())
840 		return B_BAD_VALUE;
841 
842 	MutexLocker _(parent->lock);
843 
844 	parent->pending_children.Remove(socket);
845 	parent->connected_children.Add(socket);
846 	socket->is_connected = true;
847 
848 	// notify parent
849 	if (parent->select_pool)
850 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
851 
852 	return B_OK;
853 }
854 
855 
856 /*!	The socket has been aborted. Steals the parent's reference, and releases
857 	it.
858 */
859 status_t
860 socket_aborted(net_socket* _socket)
861 {
862 	net_socket_private* socket = (net_socket_private*)_socket;
863 
864 	TRACE("socket_aborted(%p)\n", socket);
865 
866 	BReference<net_socket_private> parent = socket->parent.GetReference();
867 	if (!parent.IsSet())
868 		return B_BAD_VALUE;
869 
870 	MutexLocker _(parent->lock);
871 
872 	if (socket->is_connected)
873 		parent->connected_children.Remove(socket);
874 	else
875 		parent->pending_children.Remove(socket);
876 
877 	parent->child_count--;
878 	socket->RemoveFromParent();
879 
880 	return B_OK;
881 }
882 
883 
884 //	#pragma mark - notifications
885 
886 
887 status_t
888 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
889 {
890 	net_socket_private* socket = (net_socket_private*)_socket;
891 
892 	mutex_lock(&socket->lock);
893 
894 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
895 		event);
896 
897 	mutex_unlock(&socket->lock);
898 
899 	if (status != B_OK)
900 		return status;
901 
902 	// check if the event is already present
903 	// TODO: add support for poll() types
904 
905 	switch (event) {
906 		case B_SELECT_READ:
907 		{
908 			ssize_t available = socket_read_avail(socket);
909 			if ((ssize_t)socket->receive.low_water_mark <= available
910 				|| available < B_OK)
911 				notify_select_event(sync, event);
912 			break;
913 		}
914 		case B_SELECT_WRITE:
915 		{
916 			ssize_t available = socket_send_avail(socket);
917 			if ((ssize_t)socket->send.low_water_mark <= available
918 				|| available < B_OK)
919 				notify_select_event(sync, event);
920 			break;
921 		}
922 		case B_SELECT_ERROR:
923 			if (socket->error != B_OK)
924 				notify_select_event(sync, event);
925 			break;
926 	}
927 
928 	return B_OK;
929 }
930 
931 
932 status_t
933 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
934 {
935 	net_socket_private* socket = (net_socket_private*)_socket;
936 
937 	MutexLocker _(socket->lock);
938 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
939 }
940 
941 
942 status_t
943 socket_notify(net_socket* _socket, uint8 event, int32 value)
944 {
945 	net_socket_private* socket = (net_socket_private*)_socket;
946 	bool notify = true;
947 
948 	switch (event) {
949 		case B_SELECT_READ:
950 			if ((ssize_t)socket->receive.low_water_mark > value
951 				&& value >= B_OK)
952 				notify = false;
953 			break;
954 
955 		case B_SELECT_WRITE:
956 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
957 				notify = false;
958 			break;
959 
960 		case B_SELECT_ERROR:
961 			socket->error = value;
962 			break;
963 	}
964 
965 	MutexLocker _(socket->lock);
966 
967 	if (notify && socket->select_pool != NULL) {
968 		notify_select_event_pool(socket->select_pool, event);
969 
970 		if (event == B_SELECT_ERROR) {
971 			// always notify read/write on error
972 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
973 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
974 		}
975 	}
976 
977 	return B_OK;
978 }
979 
980 
981 //	#pragma mark - standard socket API
982 
983 
984 int
985 socket_accept(net_socket* socket, struct sockaddr* address,
986 	socklen_t* _addressLength, net_socket** _acceptedSocket)
987 {
988 	if ((socket->options & SO_ACCEPTCONN) == 0)
989 		return B_BAD_VALUE;
990 
991 	net_socket* accepted;
992 	status_t status = socket->first_info->accept(socket->first_protocol,
993 		&accepted);
994 	if (status != B_OK)
995 		return status;
996 
997 	if (address && *_addressLength > 0) {
998 		memcpy(address, &accepted->peer, min_c(*_addressLength,
999 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
1000 		*_addressLength = accepted->peer.ss_len;
1001 	}
1002 
1003 	*_acceptedSocket = accepted;
1004 	return B_OK;
1005 }
1006 
1007 
1008 int
1009 socket_bind(net_socket* socket, const struct sockaddr* address,
1010 	socklen_t addressLength)
1011 {
1012 	sockaddr empty;
1013 	if (address == NULL) {
1014 		// special - try to bind to an empty address, like INADDR_ANY
1015 		memset(&empty, 0, sizeof(sockaddr));
1016 		empty.sa_len = sizeof(sockaddr);
1017 		empty.sa_family = socket->family;
1018 
1019 		address = &empty;
1020 		addressLength = sizeof(sockaddr);
1021 	}
1022 
1023 	if (socket->address.ss_len != 0) {
1024 		status_t status = socket->first_info->unbind(socket->first_protocol,
1025 			(sockaddr*)&socket->address);
1026 		if (status != B_OK)
1027 			return status;
1028 	}
1029 
1030 	memcpy(&socket->address, address, sizeof(sockaddr));
1031 	socket->address.ss_len = sizeof(sockaddr_storage);
1032 
1033 	status_t status = socket->first_info->bind(socket->first_protocol,
1034 		(sockaddr*)address);
1035 	if (status != B_OK) {
1036 		// clear address again, as binding failed
1037 		socket->address.ss_len = 0;
1038 	}
1039 
1040 	return status;
1041 }
1042 
1043 
1044 int
1045 socket_connect(net_socket* socket, const struct sockaddr* address,
1046 	socklen_t addressLength)
1047 {
1048 	if (address == NULL || addressLength == 0)
1049 		return ENETUNREACH;
1050 
1051 	if (socket->address.ss_len == 0) {
1052 		// try to bind first
1053 		status_t status = socket_bind(socket, NULL, 0);
1054 		if (status != B_OK)
1055 			return status;
1056 	}
1057 
1058 	return socket->first_info->connect(socket->first_protocol, address);
1059 }
1060 
1061 
1062 int
1063 socket_getpeername(net_socket* _socket, struct sockaddr* address,
1064 	socklen_t* _addressLength)
1065 {
1066 	net_socket_private* socket = (net_socket_private*)_socket;
1067 	if (!socket->is_connected || socket->peer.ss_len == 0)
1068 		return ENOTCONN;
1069 
1070 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1071 	*_addressLength = socket->peer.ss_len;
1072 	return B_OK;
1073 }
1074 
1075 
1076 int
1077 socket_getsockname(net_socket* socket, struct sockaddr* address,
1078 	socklen_t* _addressLength)
1079 {
1080 	if (socket->address.ss_len == 0) {
1081 		struct sockaddr buffer;
1082 		memset(&buffer, 0, sizeof(buffer));
1083 		buffer.sa_family = socket->family;
1084 
1085 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
1086 		*_addressLength = sizeof(buffer);
1087 		return B_OK;
1088 	}
1089 
1090 	memcpy(address, &socket->address, min_c(*_addressLength,
1091 		socket->address.ss_len));
1092 	*_addressLength = socket->address.ss_len;
1093 	return B_OK;
1094 }
1095 
1096 
1097 status_t
1098 socket_get_option(net_socket* socket, int level, int option, void* value,
1099 	int* _length)
1100 {
1101 	if (level != SOL_SOCKET)
1102 		return ENOPROTOOPT;
1103 
1104 	switch (option) {
1105 		case SO_SNDBUF:
1106 		{
1107 			uint32* size = (uint32*)value;
1108 			*size = socket->send.buffer_size;
1109 			*_length = sizeof(uint32);
1110 			return B_OK;
1111 		}
1112 
1113 		case SO_RCVBUF:
1114 		{
1115 			uint32* size = (uint32*)value;
1116 			*size = socket->receive.buffer_size;
1117 			*_length = sizeof(uint32);
1118 			return B_OK;
1119 		}
1120 
1121 		case SO_SNDLOWAT:
1122 		{
1123 			uint32* size = (uint32*)value;
1124 			*size = socket->send.low_water_mark;
1125 			*_length = sizeof(uint32);
1126 			return B_OK;
1127 		}
1128 
1129 		case SO_RCVLOWAT:
1130 		{
1131 			uint32* size = (uint32*)value;
1132 			*size = socket->receive.low_water_mark;
1133 			*_length = sizeof(uint32);
1134 			return B_OK;
1135 		}
1136 
1137 		case SO_RCVTIMEO:
1138 		case SO_SNDTIMEO:
1139 		{
1140 			if (*_length < (int)sizeof(struct timeval))
1141 				return B_BAD_VALUE;
1142 
1143 			bigtime_t timeout;
1144 			if (option == SO_SNDTIMEO)
1145 				timeout = socket->send.timeout;
1146 			else
1147 				timeout = socket->receive.timeout;
1148 			if (timeout == B_INFINITE_TIMEOUT)
1149 				timeout = 0;
1150 
1151 			struct timeval* timeval = (struct timeval*)value;
1152 			timeval->tv_sec = timeout / 1000000LL;
1153 			timeval->tv_usec = timeout % 1000000LL;
1154 
1155 			*_length = sizeof(struct timeval);
1156 			return B_OK;
1157 		}
1158 
1159 		case SO_NONBLOCK:
1160 		{
1161 			int32* _set = (int32*)value;
1162 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1163 			*_length = sizeof(int32);
1164 			return B_OK;
1165 		}
1166 
1167 		case SO_ACCEPTCONN:
1168 		case SO_BROADCAST:
1169 		case SO_DEBUG:
1170 		case SO_DONTROUTE:
1171 		case SO_KEEPALIVE:
1172 		case SO_OOBINLINE:
1173 		case SO_REUSEADDR:
1174 		case SO_REUSEPORT:
1175 		case SO_USELOOPBACK:
1176 		{
1177 			int32* _set = (int32*)value;
1178 			*_set = (socket->options & option) != 0;
1179 			*_length = sizeof(int32);
1180 			return B_OK;
1181 		}
1182 
1183 		case SO_TYPE:
1184 		{
1185 			int32* _set = (int32*)value;
1186 			*_set = socket->type;
1187 			*_length = sizeof(int32);
1188 			return B_OK;
1189 		}
1190 
1191 		case SO_ERROR:
1192 		{
1193 			int32* _set = (int32*)value;
1194 			*_set = socket->error;
1195 			*_length = sizeof(int32);
1196 
1197 			socket->error = B_OK;
1198 				// clear error upon retrieval
1199 			return B_OK;
1200 		}
1201 
1202 		default:
1203 			break;
1204 	}
1205 
1206 	dprintf("socket_getsockopt: unknown option %d\n", option);
1207 	return ENOPROTOOPT;
1208 }
1209 
1210 
1211 int
1212 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1213 	int* _length)
1214 {
1215 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1216 		level, option, value, _length);
1217 }
1218 
1219 
1220 int
1221 socket_listen(net_socket* socket, int backlog)
1222 {
1223 	status_t status = socket->first_info->listen(socket->first_protocol,
1224 		backlog);
1225 	if (status == B_OK)
1226 		socket->options |= SO_ACCEPTCONN;
1227 
1228 	return status;
1229 }
1230 
1231 
1232 ssize_t
1233 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1234 	int flags)
1235 {
1236 	// If the protocol sports read_data_no_buffer() we use it.
1237 	if (socket->first_info->read_data_no_buffer != NULL)
1238 		return socket_receive_no_buffer(socket, header, data, length, flags);
1239 
1240 	size_t totalLength = length;
1241 	net_buffer* buffer;
1242 	int i;
1243 
1244 	// the convention to this function is that have header been
1245 	// present, { data, length } would have been iovec[0] and is
1246 	// always considered like that
1247 
1248 	if (header) {
1249 		// calculate the length considering all of the extra buffers
1250 		for (i = 1; i < header->msg_iovlen; i++)
1251 			totalLength += header->msg_iov[i].iov_len;
1252 	}
1253 
1254 	status_t status = socket->first_info->read_data(
1255 		socket->first_protocol, totalLength, flags, &buffer);
1256 	if (status != B_OK)
1257 		return status;
1258 
1259 	// process ancillary data
1260 	if (header != NULL) {
1261 		if (buffer != NULL && header->msg_control != NULL) {
1262 			ancillary_data_container* container
1263 				= gNetBufferModule.get_ancillary_data(buffer);
1264 			if (container != NULL)
1265 				status = process_ancillary_data(socket, container, header);
1266 			else
1267 				status = process_ancillary_data(socket, buffer, header);
1268 			if (status != B_OK) {
1269 				gNetBufferModule.free(buffer);
1270 				return status;
1271 			}
1272 		} else
1273 			header->msg_controllen = 0;
1274 	}
1275 
1276 	// TODO: - returning a NULL buffer when received 0 bytes
1277 	//         may not make much sense as we still need the address
1278 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1279 
1280 	size_t nameLen = 0;
1281 
1282 	if (header) {
1283 		// TODO: - consider the control buffer options
1284 		nameLen = header->msg_namelen;
1285 		header->msg_namelen = 0;
1286 		header->msg_flags = 0;
1287 	}
1288 
1289 	if (buffer == NULL)
1290 		return 0;
1291 
1292 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1293 
1294 	length = min_c(bytesReceived, length);
1295 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1296 		gNetBufferModule.free(buffer);
1297 		return ENOBUFS;
1298 	}
1299 
1300 	// if first copy was a success, proceed to following
1301 	// copies as required
1302 	bytesCopied += length;
1303 
1304 	if (header) {
1305 		// we only start considering at iovec[1]
1306 		// as { data, length } is iovec[0]
1307 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1308 			iovec& vec = header->msg_iov[i];
1309 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1310 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1311 					toRead) < B_OK) {
1312 				break;
1313 			}
1314 
1315 			bytesCopied += toRead;
1316 		}
1317 
1318 		if (header->msg_name != NULL) {
1319 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1320 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1321 		}
1322 	}
1323 
1324 	gNetBufferModule.free(buffer);
1325 
1326 	if (bytesCopied < bytesReceived) {
1327 		if (header)
1328 			header->msg_flags = MSG_TRUNC;
1329 
1330 		if (flags & MSG_TRUNC)
1331 			return bytesReceived;
1332 	}
1333 
1334 	return bytesCopied;
1335 }
1336 
1337 
1338 ssize_t
1339 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1340 	int flags)
1341 {
1342 	const sockaddr* address = NULL;
1343 	socklen_t addressLength = 0;
1344 	size_t bytesLeft = length;
1345 
1346 	if (length > SSIZE_MAX)
1347 		return B_BAD_VALUE;
1348 
1349 	ancillary_data_container* ancillaryData = NULL;
1350 	CObjectDeleter<
1351 		ancillary_data_container, void, delete_ancillary_data_container>
1352 		ancillaryDataDeleter;
1353 
1354 	if (header != NULL) {
1355 		address = (const sockaddr*)header->msg_name;
1356 		addressLength = header->msg_namelen;
1357 
1358 		// get the ancillary data
1359 		if (header->msg_control != NULL) {
1360 			ancillaryData = create_ancillary_data_container();
1361 			if (ancillaryData == NULL)
1362 				return B_NO_MEMORY;
1363 			ancillaryDataDeleter.SetTo(ancillaryData);
1364 
1365 			status_t status = add_ancillary_data(socket, ancillaryData,
1366 				(cmsghdr*)header->msg_control, header->msg_controllen);
1367 			if (status != B_OK)
1368 				return status;
1369 		}
1370 	}
1371 
1372 	if (addressLength == 0)
1373 		address = NULL;
1374 	else if (address == NULL)
1375 		return B_BAD_VALUE;
1376 
1377 	if (socket->peer.ss_len != 0) {
1378 		if (address != NULL)
1379 			return EISCONN;
1380 
1381 		// socket is connected, we use that address
1382 		address = (struct sockaddr*)&socket->peer;
1383 		addressLength = socket->peer.ss_len;
1384 	}
1385 
1386 	if (address == NULL || addressLength == 0) {
1387 		// don't know where to send to:
1388 		return EDESTADDRREQ;
1389 	}
1390 
1391 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1392 		&& bytesLeft > socket->send.buffer_size)
1393 		return EMSGSIZE;
1394 
1395 	if (socket->address.ss_len == 0) {
1396 		// try to bind first
1397 		status_t status = socket_bind(socket, NULL, 0);
1398 		if (status != B_OK)
1399 			return status;
1400 	}
1401 
1402 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1403 	if (socket->first_info->send_data_no_buffer != NULL) {
1404 		iovec stackVec = { (void*)data, length };
1405 		iovec* vecs = header ? header->msg_iov : &stackVec;
1406 		int vecCount = header ? header->msg_iovlen : 1;
1407 
1408 		ssize_t written = socket->first_info->send_data_no_buffer(
1409 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1410 			addressLength);
1411 		if (written > 0)
1412 			ancillaryDataDeleter.Detach();
1413 		return written;
1414 	}
1415 
1416 	// By convention, if a header is given, the (data, length) equals the first
1417 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1418 	// the size of the remaining ones.
1419 	if (header != NULL) {
1420 		if (header->msg_iovlen <= 1)
1421 			header = NULL;
1422 		else {
1423 // TODO: The iovecs have already been copied to kernel space. Simplify!
1424 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1425 				header->msg_iovlen - 1);
1426 		}
1427 	}
1428 
1429 	ssize_t bytesSent = 0;
1430 	size_t vecOffset = 0;
1431 	uint32 vecIndex = 0;
1432 
1433 	while (bytesLeft > 0) {
1434 		// TODO: useful, maybe even computed header space!
1435 		net_buffer* buffer = gNetBufferModule.create(256);
1436 		if (buffer == NULL)
1437 			return ENOBUFS;
1438 
1439 		while (buffer->size < socket->send.buffer_size
1440 			&& buffer->size < bytesLeft) {
1441 			if (vecIndex > 0 && vecOffset == 0) {
1442 				// retrieve next iovec buffer from header
1443 				iovec vec;
1444 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1445 						< B_OK) {
1446 					gNetBufferModule.free(buffer);
1447 					return B_BAD_ADDRESS;
1448 				}
1449 
1450 				data = vec.iov_base;
1451 				length = vec.iov_len;
1452 			}
1453 
1454 			size_t bytes = length;
1455 			if (buffer->size + bytes > socket->send.buffer_size)
1456 				bytes = socket->send.buffer_size - buffer->size;
1457 
1458 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1459 				gNetBufferModule.free(buffer);
1460 				return ENOBUFS;
1461 			}
1462 
1463 			if (bytes != length) {
1464 				// partial send
1465 				vecOffset = bytes;
1466 				length -= vecOffset;
1467 				data = (uint8*)data + vecOffset;
1468 			} else if (header != NULL) {
1469 				// proceed with next buffer, if any
1470 				vecOffset = 0;
1471 				vecIndex++;
1472 
1473 				if (vecIndex >= (uint32)header->msg_iovlen)
1474 					break;
1475 			}
1476 		}
1477 
1478 		// attach ancillary data to the first buffer
1479 		status_t status;
1480 		if (ancillaryData != NULL) {
1481 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1482 			ancillaryDataDeleter.Detach();
1483 			ancillaryData = NULL;
1484 		}
1485 
1486 		size_t bufferSize = buffer->size;
1487 		buffer->flags = flags;
1488 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1489 		memcpy(buffer->destination, address, addressLength);
1490 		buffer->destination->sa_len = addressLength;
1491 
1492 		status = socket->first_info->send_data(socket->first_protocol, buffer);
1493 		if (status != B_OK) {
1494 			size_t sizeAfterSend = buffer->size;
1495 			gNetBufferModule.free(buffer);
1496 
1497 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1498 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1499 				// this appears to be a partial write
1500 				return bytesSent + (bufferSize - sizeAfterSend);
1501 			}
1502 			return status;
1503 		}
1504 
1505 		bytesLeft -= bufferSize;
1506 		bytesSent += bufferSize;
1507 	}
1508 
1509 	return bytesSent;
1510 }
1511 
1512 
1513 status_t
1514 socket_set_option(net_socket* socket, int level, int option, const void* value,
1515 	int length)
1516 {
1517 	if (level != SOL_SOCKET)
1518 		return ENOPROTOOPT;
1519 
1520 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1521 
1522 	switch (option) {
1523 		// TODO: implement other options!
1524 		case SO_LINGER:
1525 		{
1526 			if (length < (int)sizeof(struct linger))
1527 				return B_BAD_VALUE;
1528 
1529 			struct linger* linger = (struct linger*)value;
1530 			if (linger->l_onoff) {
1531 				socket->options |= SO_LINGER;
1532 				socket->linger = linger->l_linger;
1533 			} else {
1534 				socket->options &= ~SO_LINGER;
1535 				socket->linger = 0;
1536 			}
1537 			return B_OK;
1538 		}
1539 
1540 		case SO_SNDBUF:
1541 			if (length != sizeof(uint32))
1542 				return B_BAD_VALUE;
1543 
1544 			socket->send.buffer_size = *(const uint32*)value;
1545 			return B_OK;
1546 
1547 		case SO_RCVBUF:
1548 			if (length != sizeof(uint32))
1549 				return B_BAD_VALUE;
1550 
1551 			socket->receive.buffer_size = *(const uint32*)value;
1552 			return B_OK;
1553 
1554 		case SO_SNDLOWAT:
1555 			if (length != sizeof(uint32))
1556 				return B_BAD_VALUE;
1557 
1558 			socket->send.low_water_mark = *(const uint32*)value;
1559 			return B_OK;
1560 
1561 		case SO_RCVLOWAT:
1562 			if (length != sizeof(uint32))
1563 				return B_BAD_VALUE;
1564 
1565 			socket->receive.low_water_mark = *(const uint32*)value;
1566 			return B_OK;
1567 
1568 		case SO_RCVTIMEO:
1569 		case SO_SNDTIMEO:
1570 		{
1571 			if (length != sizeof(struct timeval))
1572 				return B_BAD_VALUE;
1573 
1574 			const struct timeval* timeval = (const struct timeval*)value;
1575 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1576 			if (timeout == 0)
1577 				timeout = B_INFINITE_TIMEOUT;
1578 
1579 			if (option == SO_SNDTIMEO)
1580 				socket->send.timeout = timeout;
1581 			else
1582 				socket->receive.timeout = timeout;
1583 			return B_OK;
1584 		}
1585 
1586 		case SO_NONBLOCK:
1587 			if (length != sizeof(int32))
1588 				return B_BAD_VALUE;
1589 
1590 			if (*(const int32*)value) {
1591 				socket->send.timeout = 0;
1592 				socket->receive.timeout = 0;
1593 			} else {
1594 				socket->send.timeout = B_INFINITE_TIMEOUT;
1595 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1596 			}
1597 			return B_OK;
1598 
1599 		case SO_BROADCAST:
1600 		case SO_DEBUG:
1601 		case SO_DONTROUTE:
1602 		case SO_KEEPALIVE:
1603 		case SO_OOBINLINE:
1604 		case SO_REUSEADDR:
1605 		case SO_REUSEPORT:
1606 		case SO_USELOOPBACK:
1607 			if (length != sizeof(int32))
1608 				return B_BAD_VALUE;
1609 
1610 			if (*(const int32*)value)
1611 				socket->options |= option;
1612 			else
1613 				socket->options &= ~option;
1614 			return B_OK;
1615 
1616 		case SO_BINDTODEVICE:
1617 		{
1618 			if (length != sizeof(uint32))
1619 				return B_BAD_VALUE;
1620 
1621 			// TODO: we might want to check if the device exists at all
1622 			// (although it doesn't really harm when we don't)
1623 			socket->bound_to_device = *(const uint32*)value;
1624 			return B_OK;
1625 		}
1626 
1627 		default:
1628 			break;
1629 	}
1630 
1631 	dprintf("socket_setsockopt: unknown option %d\n", option);
1632 	return ENOPROTOOPT;
1633 }
1634 
1635 
1636 int
1637 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1638 	int length)
1639 {
1640 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1641 		level, option, value, length);
1642 }
1643 
1644 
1645 int
1646 socket_shutdown(net_socket* socket, int direction)
1647 {
1648 	return socket->first_info->shutdown(socket->first_protocol, direction);
1649 }
1650 
1651 
1652 status_t
1653 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1654 {
1655 	sockets[0] = NULL;
1656 	sockets[1] = NULL;
1657 
1658 	// create sockets
1659 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1660 	if (error != B_OK)
1661 		return error;
1662 
1663 	error = socket_open(family, type, protocol, &sockets[1]);
1664 
1665 	// bind one
1666 	if (error == B_OK)
1667 		error = socket_bind(sockets[0], NULL, 0);
1668 
1669 	// start listening
1670 	if (error == B_OK)
1671 		error = socket_listen(sockets[0], 1);
1672 
1673 	// connect them
1674 	if (error == B_OK) {
1675 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1676 			sockets[0]->address.ss_len);
1677 	}
1678 
1679 	// accept a socket
1680 	net_socket* acceptedSocket = NULL;
1681 	if (error == B_OK)
1682 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1683 
1684 	if (error == B_OK) {
1685 		// everything worked: close the listener socket
1686 		socket_close(sockets[0]);
1687 		socket_free(sockets[0]);
1688 		sockets[0] = acceptedSocket;
1689 	} else {
1690 		// close sockets on error
1691 		for (int i = 0; i < 2; i++) {
1692 			if (sockets[i] != NULL) {
1693 				socket_close(sockets[i]);
1694 				socket_free(sockets[i]);
1695 				sockets[i] = NULL;
1696 			}
1697 		}
1698 	}
1699 
1700 	return error;
1701 }
1702 
1703 
1704 //	#pragma mark -
1705 
1706 
1707 static status_t
1708 socket_std_ops(int32 op, ...)
1709 {
1710 	switch (op) {
1711 		case B_MODULE_INIT:
1712 		{
1713 			new (&sSocketList) SocketList;
1714 			mutex_init(&sSocketLock, "socket list");
1715 
1716 #if ENABLE_DEBUGGER_COMMANDS
1717 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1718 			add_debugger_command("socket", dump_socket, "dumps a socket");
1719 #endif
1720 			return B_OK;
1721 		}
1722 		case B_MODULE_UNINIT:
1723 			ASSERT(sSocketList.IsEmpty());
1724 			mutex_destroy(&sSocketLock);
1725 
1726 #if ENABLE_DEBUGGER_COMMANDS
1727 			remove_debugger_command("socket", dump_socket);
1728 			remove_debugger_command("sockets", dump_sockets);
1729 #endif
1730 			return B_OK;
1731 
1732 		default:
1733 			return B_ERROR;
1734 	}
1735 }
1736 
1737 
1738 net_socket_module_info gNetSocketModule = {
1739 	{
1740 		NET_SOCKET_MODULE_NAME,
1741 		0,
1742 		socket_std_ops
1743 	},
1744 	socket_open,
1745 	socket_close,
1746 	socket_free,
1747 
1748 	socket_readv,
1749 	socket_writev,
1750 	socket_control,
1751 
1752 	socket_read_avail,
1753 	socket_send_avail,
1754 
1755 	socket_send_data,
1756 	socket_receive_data,
1757 
1758 	socket_get_option,
1759 	socket_set_option,
1760 
1761 	socket_get_next_stat,
1762 
1763 	// connections
1764 	socket_acquire,
1765 	socket_release,
1766 	socket_spawn_pending,
1767 	socket_dequeue_connected,
1768 	socket_count_connected,
1769 	socket_set_max_backlog,
1770 	socket_has_parent,
1771 	socket_connected,
1772 	socket_aborted,
1773 
1774 	// notifications
1775 	socket_request_notification,
1776 	socket_cancel_notification,
1777 	socket_notify,
1778 
1779 	// standard socket API
1780 	socket_accept,
1781 	socket_bind,
1782 	socket_connect,
1783 	socket_getpeername,
1784 	socket_getsockname,
1785 	socket_getsockopt,
1786 	socket_listen,
1787 	socket_receive,
1788 	socket_send,
1789 	socket_setsockopt,
1790 	socket_shutdown,
1791 	socket_socketpair
1792 };
1793 
1794