xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 151343ebc86cf0ce61a6c7789f853dff35c57e9c)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static size_t
166 compute_user_iovec_length(iovec* userVec, uint32 count)
167 {
168 	size_t length = 0;
169 
170 	for (uint32 i = 0; i < count; i++) {
171 		iovec vec;
172 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
173 			return 0;
174 
175 		length += vec.iov_len;
176 	}
177 
178 	return length;
179 }
180 
181 
182 static status_t
183 create_socket(int family, int type, int protocol, net_socket_private** _socket)
184 {
185 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
186 	if (socket == NULL)
187 		return B_NO_MEMORY;
188 	status_t status = socket->InitCheck();
189 	if (status != B_OK) {
190 		delete socket;
191 		return status;
192 	}
193 
194 	socket->family = family;
195 	socket->type = type;
196 	socket->protocol = protocol;
197 
198 	status = get_domain_protocols(socket);
199 	if (status != B_OK) {
200 		delete socket;
201 		return status;
202 	}
203 
204 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
205 		socket->type, socket->protocol);
206 
207 #ifdef TRACE_SOCKET
208 	net_protocol* current = socket->first_protocol;
209 	for (int i = 0; current != NULL; current = current->next, i++)
210 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
211 #endif
212 
213 	*_socket = socket;
214 	return B_OK;
215 }
216 
217 
218 static status_t
219 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
220 	void* data, size_t dataLen)
221 {
222 	cmsghdr* header = (cmsghdr*)data;
223 
224 	if (dataLen == 0)
225 		return B_OK;
226 
227 	if (socket->first_info->add_ancillary_data == NULL)
228 		return B_NOT_SUPPORTED;
229 
230 	while (true) {
231 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
232 			return B_BAD_VALUE;
233 
234 		status_t status = socket->first_info->add_ancillary_data(
235 			socket->first_protocol, container, header);
236 		if (status != B_OK)
237 			return status;
238 
239 		if (dataLen <= _ALIGN(header->cmsg_len))
240 			break;
241 		dataLen -= _ALIGN(header->cmsg_len);
242 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
243 	}
244 
245 	return B_OK;
246 }
247 
248 
249 static status_t
250 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
251 	msghdr* messageHeader)
252 {
253 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
254 	int dataBufferLen = messageHeader->msg_controllen;
255 
256 	if (container == NULL || dataBuffer == NULL) {
257 		messageHeader->msg_controllen = 0;
258 		return B_OK;
259 	}
260 
261 	ancillary_data_header header;
262 	void* data = NULL;
263 
264 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
265 		if (socket->first_info->process_ancillary_data == NULL)
266 			return B_NOT_SUPPORTED;
267 
268 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
269 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
270 		if (bytesWritten < 0)
271 			return bytesWritten;
272 
273 		dataBuffer += bytesWritten;
274 		dataBufferLen -= bytesWritten;
275 	}
276 
277 	messageHeader->msg_controllen -= dataBufferLen;
278 
279 	return B_OK;
280 }
281 
282 
283 static status_t
284 process_ancillary_data(net_socket* socket,
285 	net_buffer* buffer, msghdr* messageHeader)
286 {
287 	void *dataBuffer = messageHeader->msg_control;
288 	ssize_t bytesWritten;
289 
290 	if (dataBuffer == NULL) {
291 		messageHeader->msg_controllen = 0;
292 		return B_OK;
293 	}
294 
295 	if (socket->first_info->process_ancillary_data_no_container == NULL)
296 		return B_NOT_SUPPORTED;
297 
298 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
299 		socket->first_protocol, buffer, dataBuffer,
300 		messageHeader->msg_controllen);
301 	if (bytesWritten < 0)
302 		return bytesWritten;
303 	messageHeader->msg_controllen = bytesWritten;
304 
305 	return B_OK;
306 }
307 
308 
309 static ssize_t
310 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
311 	size_t length, int flags)
312 {
313 	iovec stackVec = { data, length };
314 	iovec* vecs = header ? header->msg_iov : &stackVec;
315 	int vecCount = header ? header->msg_iovlen : 1;
316 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
317 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
318 
319 	ancillary_data_container* ancillaryData = NULL;
320 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
321 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
322 		addressLen);
323 	if (bytesRead < 0)
324 		return bytesRead;
325 
326 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(ancillaryData,
327 		&delete_ancillary_data_container);
328 
329 	// process ancillary data
330 	if (header != NULL) {
331 		status_t status = process_ancillary_data(socket, ancillaryData, header);
332 		if (status != B_OK)
333 			return status;
334 
335 		header->msg_flags = 0;
336 	}
337 
338 	return bytesRead;
339 }
340 
341 
342 #if ENABLE_DEBUGGER_COMMANDS
343 
344 
345 static void
346 print_socket_line(net_socket_private* socket, const char* prefix)
347 {
348 	BReference<net_socket_private> parent = socket->parent.GetReference();
349 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
350 		socket->family, socket->type, socket->protocol, socket->owner,
351 		socket->first_protocol, socket->first_info, parent.Get(),
352 		parent.Get() != NULL ? socket->is_connected ? " (c)" : " (p)" : "");
353 }
354 
355 
356 static int
357 dump_socket(int argc, char** argv)
358 {
359 	if (argc < 2) {
360 		kprintf("usage: %s [address]\n", argv[0]);
361 		return 0;
362 	}
363 
364 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
365 
366 	kprintf("SOCKET %p\n", socket);
367 	kprintf("  family.type.protocol: %d.%d.%d\n",
368 		socket->family, socket->type, socket->protocol);
369 	BReference<net_socket_private> parent = socket->parent.GetReference();
370 	kprintf("  parent:               %p\n", parent.Get());
371 	kprintf("  first protocol:       %p\n", socket->first_protocol);
372 	kprintf("  first module_info:    %p\n", socket->first_info);
373 	kprintf("  options:              %x\n", socket->options);
374 	kprintf("  linger:               %d\n", socket->linger);
375 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
376 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
377 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
378 	kprintf("  is connected:         %d\n", socket->is_connected);
379 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
380 
381 	if (socket->child_count == 0)
382 		return 0;
383 
384 	kprintf("    pending children:\n");
385 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
386 	while (net_socket_private* child = iterator.Next()) {
387 		print_socket_line(child, "      ");
388 	}
389 
390 	kprintf("    connected children:\n");
391 	iterator = socket->connected_children.GetIterator();
392 	while (net_socket_private* child = iterator.Next()) {
393 		print_socket_line(child, "      ");
394 	}
395 
396 	return 0;
397 }
398 
399 
400 static int
401 dump_sockets(int argc, char** argv)
402 {
403 	kprintf("address        kind  owner protocol   module_info parent\n");
404 
405 	SocketList::Iterator iterator = sSocketList.GetIterator();
406 	while (net_socket_private* socket = iterator.Next()) {
407 		print_socket_line(socket, "");
408 
409 		SocketList::Iterator childIterator
410 			= socket->pending_children.GetIterator();
411 		while (net_socket_private* child = childIterator.Next()) {
412 			print_socket_line(child, " ");
413 		}
414 
415 		childIterator = socket->connected_children.GetIterator();
416 		while (net_socket_private* child = childIterator.Next()) {
417 			print_socket_line(child, " ");
418 		}
419 	}
420 
421 	return 0;
422 }
423 
424 
425 #endif	// ENABLE_DEBUGGER_COMMANDS
426 
427 
428 //	#pragma mark -
429 
430 
431 status_t
432 socket_open(int family, int type, int protocol, net_socket** _socket)
433 {
434 	net_socket_private* socket;
435 	status_t status = create_socket(family, type, protocol, &socket);
436 	if (status != B_OK)
437 		return status;
438 
439 	status = socket->first_info->open(socket->first_protocol);
440 	if (status != B_OK) {
441 		delete socket;
442 		return status;
443 	}
444 
445 	socket->owner = team_get_current_team_id();
446 	socket->is_in_socket_list = true;
447 
448 	mutex_lock(&sSocketLock);
449 	sSocketList.Add(socket);
450 	mutex_unlock(&sSocketLock);
451 
452 	*_socket = socket;
453 	return B_OK;
454 }
455 
456 
457 status_t
458 socket_close(net_socket* _socket)
459 {
460 	net_socket_private* socket = (net_socket_private*)_socket;
461 	return socket->first_info->close(socket->first_protocol);
462 }
463 
464 
465 void
466 socket_free(net_socket* _socket)
467 {
468 	net_socket_private* socket = (net_socket_private*)_socket;
469 	socket->first_info->free(socket->first_protocol);
470 	socket->ReleaseReference();
471 }
472 
473 
474 status_t
475 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
476 	size_t* _length)
477 {
478 	return -1;
479 }
480 
481 
482 status_t
483 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
484 	size_t* _length)
485 {
486 	if (socket->peer.ss_len == 0)
487 		return ECONNRESET;
488 
489 	if (socket->address.ss_len == 0) {
490 		// try to bind first
491 		status_t status = socket_bind(socket, NULL, 0);
492 		if (status != B_OK)
493 			return status;
494 	}
495 
496 	// TODO: useful, maybe even computed header space!
497 	net_buffer* buffer = gNetBufferModule.create(256);
498 	if (buffer == NULL)
499 		return ENOBUFS;
500 
501 	// copy data into buffer
502 
503 	for (uint32 i = 0; i < vecCount; i++) {
504 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
505 				vecs[i].iov_len) < B_OK) {
506 			gNetBufferModule.free(buffer);
507 			return ENOBUFS;
508 		}
509 	}
510 
511 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
512 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
513 	size_t size = buffer->size;
514 
515 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
516 		buffer);
517 	if (bytesWritten < B_OK) {
518 		if (buffer->size != size) {
519 			// this appears to be a partial write
520 			*_length = size - buffer->size;
521 		}
522 		gNetBufferModule.free(buffer);
523 		return bytesWritten;
524 	}
525 
526 	*_length = bytesWritten;
527 	return B_OK;
528 }
529 
530 
531 status_t
532 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
533 {
534 	switch (op) {
535 		case FIONBIO:
536 		{
537 			if (data == NULL)
538 				return B_BAD_VALUE;
539 
540 			int value;
541 			if (is_syscall()) {
542 				if (!IS_USER_ADDRESS(data)
543 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
544 					return B_BAD_ADDRESS;
545 				}
546 			} else
547 				value = *(int*)data;
548 
549 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
550 				sizeof(int));
551 		}
552 
553 		case FIONREAD:
554 		{
555 			if (data == NULL)
556 				return B_BAD_VALUE;
557 
558 			int available = (int)socket_read_avail(socket);
559 			if (available < 0)
560 				return available;
561 
562 			if (is_syscall()) {
563 				if (!IS_USER_ADDRESS(data)
564 					|| user_memcpy(data, &available, sizeof(available))
565 						!= B_OK) {
566 					return B_BAD_ADDRESS;
567 				}
568 			} else
569 				*(int*)data = available;
570 
571 			return B_OK;
572 		}
573 
574 		case B_SET_BLOCKING_IO:
575 		case B_SET_NONBLOCKING_IO:
576 		{
577 			int value = op == B_SET_NONBLOCKING_IO;
578 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
579 				sizeof(int));
580 		}
581 	}
582 
583 	return socket->first_info->control(socket->first_protocol,
584 		LEVEL_DRIVER_IOCTL, op, data, &length);
585 }
586 
587 
588 ssize_t
589 socket_read_avail(net_socket* socket)
590 {
591 	return socket->first_info->read_avail(socket->first_protocol);
592 }
593 
594 
595 ssize_t
596 socket_send_avail(net_socket* socket)
597 {
598 	return socket->first_info->send_avail(socket->first_protocol);
599 }
600 
601 
602 status_t
603 socket_send_data(net_socket* socket, net_buffer* buffer)
604 {
605 	return socket->first_info->send_data(socket->first_protocol,
606 		buffer);
607 }
608 
609 
610 status_t
611 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
612 	net_buffer** _buffer)
613 {
614 	status_t status = socket->first_info->read_data(socket->first_protocol,
615 		length, flags, _buffer);
616 	if (status != B_OK)
617 		return status;
618 
619 	if (*_buffer && length < (*_buffer)->size) {
620 		// discard any data behind the amount requested
621 		gNetBufferModule.trim(*_buffer, length);
622 	}
623 
624 	return status;
625 }
626 
627 
628 status_t
629 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
630 {
631 	MutexLocker locker(sSocketLock);
632 
633 	net_socket_private* socket = NULL;
634 	SocketList::Iterator iterator = sSocketList.GetIterator();
635 	uint32 cookie = *_cookie;
636 	uint32 count = 0;
637 
638 	while (true) {
639 		socket = iterator.Next();
640 		if (socket == NULL)
641 			return B_ENTRY_NOT_FOUND;
642 
643 		// TODO: also traverse the pending connections
644 		if (count == cookie)
645 			break;
646 
647 		if (family == -1 || family == socket->family)
648 			count++;
649 	}
650 
651 	*_cookie = count + 1;
652 
653 	stat->family = socket->family;
654 	stat->type = socket->type;
655 	stat->protocol = socket->protocol;
656 	stat->owner = socket->owner;
657 	stat->state[0] = '\0';
658 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
659 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
660 	stat->receive_queue_size = 0;
661 	stat->send_queue_size = 0;
662 
663 	// fill in protocol specific data (if supported by the protocol)
664 	size_t length = sizeof(net_stat);
665 	socket->first_info->control(socket->first_protocol, socket->protocol,
666 		NET_STAT_SOCKET, stat, &length);
667 
668 	return B_OK;
669 }
670 
671 
672 //	#pragma mark - connections
673 
674 
675 bool
676 socket_acquire(net_socket* _socket)
677 {
678 	net_socket_private* socket = (net_socket_private*)_socket;
679 
680 	// During destruction, the socket might still be accessible over its
681 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
682 	// socket anymore -- while not obvious, the endpoint protocol is responsible
683 	// for the proper locking here.
684 	if (socket->CountReferences() == 0)
685 		return false;
686 
687 	socket->AcquireReference();
688 	return true;
689 }
690 
691 
692 bool
693 socket_release(net_socket* _socket)
694 {
695 	net_socket_private* socket = (net_socket_private*)_socket;
696 	return socket->ReleaseReference();
697 }
698 
699 
700 status_t
701 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
702 {
703 	net_socket_private* parent = (net_socket_private*)_parent;
704 
705 	TRACE("%s(%p)\n", __FUNCTION__, parent);
706 
707 	MutexLocker locker(parent->lock);
708 
709 	// We actually accept more pending connections to compensate for those
710 	// that never complete, and also make sure at least a single connection
711 	// can always be accepted
712 	if (parent->child_count > 3 * parent->max_backlog / 2)
713 		return ENOBUFS;
714 
715 	net_socket_private* socket;
716 	status_t status = create_socket(parent->family, parent->type,
717 		parent->protocol, &socket);
718 	if (status != B_OK)
719 		return status;
720 
721 	// inherit parent's properties
722 	socket->send = parent->send;
723 	socket->receive = parent->receive;
724 	socket->options = parent->options & ~SO_ACCEPTCONN;
725 	socket->linger = parent->linger;
726 	socket->owner = parent->owner;
727 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
728 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
729 
730 	// add to the parent's list of pending connections
731 	parent->pending_children.Add(socket);
732 	socket->parent = parent;
733 	parent->child_count++;
734 
735 	*_socket = socket;
736 	return B_OK;
737 }
738 
739 
740 /*!	Dequeues a connected child from a parent socket.
741 	It also returns a reference with the child socket.
742 */
743 status_t
744 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
745 {
746 	net_socket_private* parent = (net_socket_private*)_parent;
747 
748 	mutex_lock(&parent->lock);
749 
750 	net_socket_private* socket = parent->connected_children.RemoveHead();
751 	if (socket != NULL) {
752 		socket->AcquireReference();
753 		socket->RemoveFromParent();
754 		parent->child_count--;
755 		*_socket = socket;
756 	}
757 
758 	mutex_unlock(&parent->lock);
759 
760 	if (socket == NULL)
761 		return B_ENTRY_NOT_FOUND;
762 
763 	return B_OK;
764 }
765 
766 
767 ssize_t
768 socket_count_connected(net_socket* _parent)
769 {
770 	net_socket_private* parent = (net_socket_private*)_parent;
771 
772 	MutexLocker _(parent->lock);
773 	return parent->connected_children.Count();
774 }
775 
776 
777 status_t
778 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
779 {
780 	net_socket_private* socket = (net_socket_private*)_socket;
781 
782 	// we enforce an upper limit of connections waiting to be accepted
783 	if (backlog > 256)
784 		backlog = 256;
785 
786 	MutexLocker _(socket->lock);
787 
788 	// first remove the pending connections, then the already connected
789 	// ones as needed
790 	net_socket_private* child;
791 	while (socket->child_count > backlog
792 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
793 		child->RemoveFromParent();
794 		socket->child_count--;
795 	}
796 	while (socket->child_count > backlog
797 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
798 		child->RemoveFromParent();
799 		socket->child_count--;
800 	}
801 
802 	socket->max_backlog = backlog;
803 	return B_OK;
804 }
805 
806 
807 /*!	Returns whether or not this socket has a parent. The parent might not be
808 	valid anymore, though.
809 */
810 bool
811 socket_has_parent(net_socket* _socket)
812 {
813 	net_socket_private* socket = (net_socket_private*)_socket;
814 	return socket->parent != NULL;
815 }
816 
817 
818 /*!	The socket has been connected. It will be moved to the connected queue
819 	of its parent socket.
820 */
821 status_t
822 socket_connected(net_socket* _socket)
823 {
824 	net_socket_private* socket = (net_socket_private*)_socket;
825 
826 	TRACE("socket_connected(%p)\n", socket);
827 
828 	BReference<net_socket_private> parent = socket->parent.GetReference();
829 	if (parent.Get() == NULL)
830 		return B_BAD_VALUE;
831 
832 	MutexLocker _(parent->lock);
833 
834 	parent->pending_children.Remove(socket);
835 	parent->connected_children.Add(socket);
836 	socket->is_connected = true;
837 
838 	// notify parent
839 	if (parent->select_pool)
840 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
841 
842 	return B_OK;
843 }
844 
845 
846 /*!	The socket has been aborted. Steals the parent's reference, and releases
847 	it.
848 */
849 status_t
850 socket_aborted(net_socket* _socket)
851 {
852 	net_socket_private* socket = (net_socket_private*)_socket;
853 
854 	TRACE("socket_aborted(%p)\n", socket);
855 
856 	BReference<net_socket_private> parent = socket->parent.GetReference();
857 	if (parent.Get() == NULL)
858 		return B_BAD_VALUE;
859 
860 	MutexLocker _(parent->lock);
861 
862 	if (socket->is_connected)
863 		parent->connected_children.Remove(socket);
864 	else
865 		parent->pending_children.Remove(socket);
866 
867 	parent->child_count--;
868 	socket->RemoveFromParent();
869 
870 	return B_OK;
871 }
872 
873 
874 //	#pragma mark - notifications
875 
876 
877 status_t
878 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
879 {
880 	net_socket_private* socket = (net_socket_private*)_socket;
881 
882 	mutex_lock(&socket->lock);
883 
884 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
885 		event);
886 
887 	mutex_unlock(&socket->lock);
888 
889 	if (status != B_OK)
890 		return status;
891 
892 	// check if the event is already present
893 	// TODO: add support for poll() types
894 
895 	switch (event) {
896 		case B_SELECT_READ:
897 		{
898 			ssize_t available = socket_read_avail(socket);
899 			if ((ssize_t)socket->receive.low_water_mark <= available
900 				|| available < B_OK)
901 				notify_select_event(sync, event);
902 			break;
903 		}
904 		case B_SELECT_WRITE:
905 		{
906 			ssize_t available = socket_send_avail(socket);
907 			if ((ssize_t)socket->send.low_water_mark <= available
908 				|| available < B_OK)
909 				notify_select_event(sync, event);
910 			break;
911 		}
912 		case B_SELECT_ERROR:
913 			if (socket->error != B_OK)
914 				notify_select_event(sync, event);
915 			break;
916 	}
917 
918 	return B_OK;
919 }
920 
921 
922 status_t
923 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
924 {
925 	net_socket_private* socket = (net_socket_private*)_socket;
926 
927 	MutexLocker _(socket->lock);
928 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
929 }
930 
931 
932 status_t
933 socket_notify(net_socket* _socket, uint8 event, int32 value)
934 {
935 	net_socket_private* socket = (net_socket_private*)_socket;
936 	bool notify = true;
937 
938 	switch (event) {
939 		case B_SELECT_READ:
940 			if ((ssize_t)socket->receive.low_water_mark > value
941 				&& value >= B_OK)
942 				notify = false;
943 			break;
944 
945 		case B_SELECT_WRITE:
946 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
947 				notify = false;
948 			break;
949 
950 		case B_SELECT_ERROR:
951 			socket->error = value;
952 			break;
953 	}
954 
955 	MutexLocker _(socket->lock);
956 
957 	if (notify && socket->select_pool != NULL) {
958 		notify_select_event_pool(socket->select_pool, event);
959 
960 		if (event == B_SELECT_ERROR) {
961 			// always notify read/write on error
962 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
963 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
964 		}
965 	}
966 
967 	return B_OK;
968 }
969 
970 
971 //	#pragma mark - standard socket API
972 
973 
974 int
975 socket_accept(net_socket* socket, struct sockaddr* address,
976 	socklen_t* _addressLength, net_socket** _acceptedSocket)
977 {
978 	if ((socket->options & SO_ACCEPTCONN) == 0)
979 		return B_BAD_VALUE;
980 
981 	net_socket* accepted;
982 	status_t status = socket->first_info->accept(socket->first_protocol,
983 		&accepted);
984 	if (status != B_OK)
985 		return status;
986 
987 	if (address && *_addressLength > 0) {
988 		memcpy(address, &accepted->peer, min_c(*_addressLength,
989 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
990 		*_addressLength = accepted->peer.ss_len;
991 	}
992 
993 	*_acceptedSocket = accepted;
994 	return B_OK;
995 }
996 
997 
998 int
999 socket_bind(net_socket* socket, const struct sockaddr* address,
1000 	socklen_t addressLength)
1001 {
1002 	sockaddr empty;
1003 	if (address == NULL) {
1004 		// special - try to bind to an empty address, like INADDR_ANY
1005 		memset(&empty, 0, sizeof(sockaddr));
1006 		empty.sa_len = sizeof(sockaddr);
1007 		empty.sa_family = socket->family;
1008 
1009 		address = &empty;
1010 		addressLength = sizeof(sockaddr);
1011 	}
1012 
1013 	if (socket->address.ss_len != 0) {
1014 		status_t status = socket->first_info->unbind(socket->first_protocol,
1015 			(sockaddr*)&socket->address);
1016 		if (status != B_OK)
1017 			return status;
1018 	}
1019 
1020 	memcpy(&socket->address, address, sizeof(sockaddr));
1021 	socket->address.ss_len = sizeof(sockaddr_storage);
1022 
1023 	status_t status = socket->first_info->bind(socket->first_protocol,
1024 		(sockaddr*)address);
1025 	if (status != B_OK) {
1026 		// clear address again, as binding failed
1027 		socket->address.ss_len = 0;
1028 	}
1029 
1030 	return status;
1031 }
1032 
1033 
1034 int
1035 socket_connect(net_socket* socket, const struct sockaddr* address,
1036 	socklen_t addressLength)
1037 {
1038 	if (address == NULL || addressLength == 0)
1039 		return ENETUNREACH;
1040 
1041 	if (socket->address.ss_len == 0) {
1042 		// try to bind first
1043 		status_t status = socket_bind(socket, NULL, 0);
1044 		if (status != B_OK)
1045 			return status;
1046 	}
1047 
1048 	return socket->first_info->connect(socket->first_protocol, address);
1049 }
1050 
1051 
1052 int
1053 socket_getpeername(net_socket* socket, struct sockaddr* address,
1054 	socklen_t* _addressLength)
1055 {
1056 	if (socket->peer.ss_len == 0)
1057 		return ENOTCONN;
1058 
1059 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1060 	*_addressLength = socket->peer.ss_len;
1061 	return B_OK;
1062 }
1063 
1064 
1065 int
1066 socket_getsockname(net_socket* socket, struct sockaddr* address,
1067 	socklen_t* _addressLength)
1068 {
1069 	if (socket->address.ss_len == 0) {
1070 		struct sockaddr buffer;
1071 		memset(&buffer, 0, sizeof(buffer));
1072 		buffer.sa_family = socket->family;
1073 
1074 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
1075 		*_addressLength = sizeof(buffer);
1076 		return B_OK;
1077 	}
1078 
1079 	memcpy(address, &socket->address, min_c(*_addressLength,
1080 		socket->address.ss_len));
1081 	*_addressLength = socket->address.ss_len;
1082 	return B_OK;
1083 }
1084 
1085 
1086 status_t
1087 socket_get_option(net_socket* socket, int level, int option, void* value,
1088 	int* _length)
1089 {
1090 	if (level != SOL_SOCKET)
1091 		return ENOPROTOOPT;
1092 
1093 	switch (option) {
1094 		case SO_SNDBUF:
1095 		{
1096 			uint32* size = (uint32*)value;
1097 			*size = socket->send.buffer_size;
1098 			*_length = sizeof(uint32);
1099 			return B_OK;
1100 		}
1101 
1102 		case SO_RCVBUF:
1103 		{
1104 			uint32* size = (uint32*)value;
1105 			*size = socket->receive.buffer_size;
1106 			*_length = sizeof(uint32);
1107 			return B_OK;
1108 		}
1109 
1110 		case SO_SNDLOWAT:
1111 		{
1112 			uint32* size = (uint32*)value;
1113 			*size = socket->send.low_water_mark;
1114 			*_length = sizeof(uint32);
1115 			return B_OK;
1116 		}
1117 
1118 		case SO_RCVLOWAT:
1119 		{
1120 			uint32* size = (uint32*)value;
1121 			*size = socket->receive.low_water_mark;
1122 			*_length = sizeof(uint32);
1123 			return B_OK;
1124 		}
1125 
1126 		case SO_RCVTIMEO:
1127 		case SO_SNDTIMEO:
1128 		{
1129 			if (*_length < (int)sizeof(struct timeval))
1130 				return B_BAD_VALUE;
1131 
1132 			bigtime_t timeout;
1133 			if (option == SO_SNDTIMEO)
1134 				timeout = socket->send.timeout;
1135 			else
1136 				timeout = socket->receive.timeout;
1137 			if (timeout == B_INFINITE_TIMEOUT)
1138 				timeout = 0;
1139 
1140 			struct timeval* timeval = (struct timeval*)value;
1141 			timeval->tv_sec = timeout / 1000000LL;
1142 			timeval->tv_usec = timeout % 1000000LL;
1143 
1144 			*_length = sizeof(struct timeval);
1145 			return B_OK;
1146 		}
1147 
1148 		case SO_NONBLOCK:
1149 		{
1150 			int32* _set = (int32*)value;
1151 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1152 			*_length = sizeof(int32);
1153 			return B_OK;
1154 		}
1155 
1156 		case SO_ACCEPTCONN:
1157 		case SO_BROADCAST:
1158 		case SO_DEBUG:
1159 		case SO_DONTROUTE:
1160 		case SO_KEEPALIVE:
1161 		case SO_OOBINLINE:
1162 		case SO_REUSEADDR:
1163 		case SO_REUSEPORT:
1164 		case SO_USELOOPBACK:
1165 		{
1166 			int32* _set = (int32*)value;
1167 			*_set = (socket->options & option) != 0;
1168 			*_length = sizeof(int32);
1169 			return B_OK;
1170 		}
1171 
1172 		case SO_TYPE:
1173 		{
1174 			int32* _set = (int32*)value;
1175 			*_set = socket->type;
1176 			*_length = sizeof(int32);
1177 			return B_OK;
1178 		}
1179 
1180 		case SO_ERROR:
1181 		{
1182 			int32* _set = (int32*)value;
1183 			*_set = socket->error;
1184 			*_length = sizeof(int32);
1185 
1186 			socket->error = B_OK;
1187 				// clear error upon retrieval
1188 			return B_OK;
1189 		}
1190 
1191 		default:
1192 			break;
1193 	}
1194 
1195 	dprintf("socket_getsockopt: unknown option %d\n", option);
1196 	return ENOPROTOOPT;
1197 }
1198 
1199 
1200 int
1201 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1202 	int* _length)
1203 {
1204 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1205 		level, option, value, _length);
1206 }
1207 
1208 
1209 int
1210 socket_listen(net_socket* socket, int backlog)
1211 {
1212 	status_t status = socket->first_info->listen(socket->first_protocol,
1213 		backlog);
1214 	if (status == B_OK)
1215 		socket->options |= SO_ACCEPTCONN;
1216 
1217 	return status;
1218 }
1219 
1220 
1221 ssize_t
1222 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1223 	int flags)
1224 {
1225 	// If the protocol sports read_data_no_buffer() we use it.
1226 	if (socket->first_info->read_data_no_buffer != NULL)
1227 		return socket_receive_no_buffer(socket, header, data, length, flags);
1228 
1229 	size_t totalLength = length;
1230 	net_buffer* buffer;
1231 	int i;
1232 
1233 	// the convention to this function is that have header been
1234 	// present, { data, length } would have been iovec[0] and is
1235 	// always considered like that
1236 
1237 	if (header) {
1238 		// calculate the length considering all of the extra buffers
1239 		for (i = 1; i < header->msg_iovlen; i++)
1240 			totalLength += header->msg_iov[i].iov_len;
1241 	}
1242 
1243 	status_t status = socket->first_info->read_data(
1244 		socket->first_protocol, totalLength, flags, &buffer);
1245 	if (status != B_OK)
1246 		return status;
1247 
1248 	// process ancillary data
1249 	if (header != NULL) {
1250 		if (buffer != NULL && header->msg_control != NULL) {
1251 			ancillary_data_container* container
1252 				= gNetBufferModule.get_ancillary_data(buffer);
1253 			if (container != NULL)
1254 				status = process_ancillary_data(socket, container, header);
1255 			else
1256 				status = process_ancillary_data(socket, buffer, header);
1257 			if (status != B_OK) {
1258 				gNetBufferModule.free(buffer);
1259 				return status;
1260 			}
1261 		} else
1262 			header->msg_controllen = 0;
1263 	}
1264 
1265 	// TODO: - returning a NULL buffer when received 0 bytes
1266 	//         may not make much sense as we still need the address
1267 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1268 
1269 	size_t nameLen = 0;
1270 
1271 	if (header) {
1272 		// TODO: - consider the control buffer options
1273 		nameLen = header->msg_namelen;
1274 		header->msg_namelen = 0;
1275 		header->msg_flags = 0;
1276 	}
1277 
1278 	if (buffer == NULL)
1279 		return 0;
1280 
1281 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1282 
1283 	length = min_c(bytesReceived, length);
1284 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1285 		gNetBufferModule.free(buffer);
1286 		return ENOBUFS;
1287 	}
1288 
1289 	// if first copy was a success, proceed to following
1290 	// copies as required
1291 	bytesCopied += length;
1292 
1293 	if (header) {
1294 		// we only start considering at iovec[1]
1295 		// as { data, length } is iovec[0]
1296 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1297 			iovec& vec = header->msg_iov[i];
1298 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1299 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1300 					toRead) < B_OK) {
1301 				break;
1302 			}
1303 
1304 			bytesCopied += toRead;
1305 		}
1306 
1307 		if (header->msg_name != NULL) {
1308 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1309 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1310 		}
1311 	}
1312 
1313 	gNetBufferModule.free(buffer);
1314 
1315 	if (bytesCopied < bytesReceived) {
1316 		if (header)
1317 			header->msg_flags = MSG_TRUNC;
1318 
1319 		if (flags & MSG_TRUNC)
1320 			return bytesReceived;
1321 	}
1322 
1323 	return bytesCopied;
1324 }
1325 
1326 
1327 ssize_t
1328 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1329 	int flags)
1330 {
1331 	const sockaddr* address = NULL;
1332 	socklen_t addressLength = 0;
1333 	size_t bytesLeft = length;
1334 
1335 	if (length > SSIZE_MAX)
1336 		return B_BAD_VALUE;
1337 
1338 	ancillary_data_container* ancillaryData = NULL;
1339 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(NULL,
1340 		&delete_ancillary_data_container);
1341 
1342 	if (header != NULL) {
1343 		address = (const sockaddr*)header->msg_name;
1344 		addressLength = header->msg_namelen;
1345 
1346 		// get the ancillary data
1347 		if (header->msg_control != NULL) {
1348 			ancillaryData = create_ancillary_data_container();
1349 			if (ancillaryData == NULL)
1350 				return B_NO_MEMORY;
1351 			ancillaryDataDeleter.SetTo(ancillaryData);
1352 
1353 			status_t status = add_ancillary_data(socket, ancillaryData,
1354 				(cmsghdr*)header->msg_control, header->msg_controllen);
1355 			if (status != B_OK)
1356 				return status;
1357 		}
1358 	}
1359 
1360 	if (addressLength == 0)
1361 		address = NULL;
1362 	else if (address == NULL)
1363 		return B_BAD_VALUE;
1364 
1365 	if (socket->peer.ss_len != 0) {
1366 		if (address != NULL)
1367 			return EISCONN;
1368 
1369 		// socket is connected, we use that address
1370 		address = (struct sockaddr*)&socket->peer;
1371 		addressLength = socket->peer.ss_len;
1372 	}
1373 
1374 	if (address == NULL || addressLength == 0) {
1375 		// don't know where to send to:
1376 		return EDESTADDRREQ;
1377 	}
1378 
1379 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1380 		&& bytesLeft > socket->send.buffer_size)
1381 		return EMSGSIZE;
1382 
1383 	if (socket->address.ss_len == 0) {
1384 		// try to bind first
1385 		status_t status = socket_bind(socket, NULL, 0);
1386 		if (status != B_OK)
1387 			return status;
1388 	}
1389 
1390 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1391 	if (socket->first_info->send_data_no_buffer != NULL) {
1392 		iovec stackVec = { (void*)data, length };
1393 		iovec* vecs = header ? header->msg_iov : &stackVec;
1394 		int vecCount = header ? header->msg_iovlen : 1;
1395 
1396 		ssize_t written = socket->first_info->send_data_no_buffer(
1397 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1398 			addressLength);
1399 		if (written > 0)
1400 			ancillaryDataDeleter.Detach();
1401 		return written;
1402 	}
1403 
1404 	// By convention, if a header is given, the (data, length) equals the first
1405 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1406 	// the size of the remaining ones.
1407 	if (header != NULL) {
1408 		if (header->msg_iovlen <= 1)
1409 			header = NULL;
1410 		else {
1411 // TODO: The iovecs have already been copied to kernel space. Simplify!
1412 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1413 				header->msg_iovlen - 1);
1414 		}
1415 	}
1416 
1417 	ssize_t bytesSent = 0;
1418 	size_t vecOffset = 0;
1419 	uint32 vecIndex = 0;
1420 
1421 	while (bytesLeft > 0) {
1422 		// TODO: useful, maybe even computed header space!
1423 		net_buffer* buffer = gNetBufferModule.create(256);
1424 		if (buffer == NULL)
1425 			return ENOBUFS;
1426 
1427 		while (buffer->size < socket->send.buffer_size
1428 			&& buffer->size < bytesLeft) {
1429 			if (vecIndex > 0 && vecOffset == 0) {
1430 				// retrieve next iovec buffer from header
1431 				iovec vec;
1432 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1433 						< B_OK) {
1434 					gNetBufferModule.free(buffer);
1435 					return B_BAD_ADDRESS;
1436 				}
1437 
1438 				data = vec.iov_base;
1439 				length = vec.iov_len;
1440 			}
1441 
1442 			size_t bytes = length;
1443 			if (buffer->size + bytes > socket->send.buffer_size)
1444 				bytes = socket->send.buffer_size - buffer->size;
1445 
1446 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1447 				gNetBufferModule.free(buffer);
1448 				return ENOBUFS;
1449 			}
1450 
1451 			if (bytes != length) {
1452 				// partial send
1453 				vecOffset = bytes;
1454 				length -= vecOffset;
1455 				data = (uint8*)data + vecOffset;
1456 			} else if (header != NULL) {
1457 				// proceed with next buffer, if any
1458 				vecOffset = 0;
1459 				vecIndex++;
1460 
1461 				if (vecIndex >= (uint32)header->msg_iovlen)
1462 					break;
1463 			}
1464 		}
1465 
1466 		// attach ancillary data to the first buffer
1467 		status_t status = B_OK;
1468 		if (ancillaryData != NULL) {
1469 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1470 			ancillaryDataDeleter.Detach();
1471 			ancillaryData = NULL;
1472 		}
1473 
1474 		size_t bufferSize = buffer->size;
1475 		buffer->flags = flags;
1476 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1477 		memcpy(buffer->destination, address, addressLength);
1478 		buffer->destination->sa_len = addressLength;
1479 
1480 		if (status == B_OK) {
1481 			status = socket->first_info->send_data(socket->first_protocol,
1482 				buffer);
1483 		}
1484 		if (status != B_OK) {
1485 			size_t sizeAfterSend = buffer->size;
1486 			gNetBufferModule.free(buffer);
1487 
1488 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1489 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1490 				// this appears to be a partial write
1491 				return bytesSent + (bufferSize - sizeAfterSend);
1492 			}
1493 			return status;
1494 		}
1495 
1496 		bytesLeft -= bufferSize;
1497 		bytesSent += bufferSize;
1498 	}
1499 
1500 	return bytesSent;
1501 }
1502 
1503 
1504 status_t
1505 socket_set_option(net_socket* socket, int level, int option, const void* value,
1506 	int length)
1507 {
1508 	if (level != SOL_SOCKET)
1509 		return ENOPROTOOPT;
1510 
1511 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1512 
1513 	switch (option) {
1514 		// TODO: implement other options!
1515 		case SO_LINGER:
1516 		{
1517 			if (length < (int)sizeof(struct linger))
1518 				return B_BAD_VALUE;
1519 
1520 			struct linger* linger = (struct linger*)value;
1521 			if (linger->l_onoff) {
1522 				socket->options |= SO_LINGER;
1523 				socket->linger = linger->l_linger;
1524 			} else {
1525 				socket->options &= ~SO_LINGER;
1526 				socket->linger = 0;
1527 			}
1528 			return B_OK;
1529 		}
1530 
1531 		case SO_SNDBUF:
1532 			if (length != sizeof(uint32))
1533 				return B_BAD_VALUE;
1534 
1535 			socket->send.buffer_size = *(const uint32*)value;
1536 			return B_OK;
1537 
1538 		case SO_RCVBUF:
1539 			if (length != sizeof(uint32))
1540 				return B_BAD_VALUE;
1541 
1542 			socket->receive.buffer_size = *(const uint32*)value;
1543 			return B_OK;
1544 
1545 		case SO_SNDLOWAT:
1546 			if (length != sizeof(uint32))
1547 				return B_BAD_VALUE;
1548 
1549 			socket->send.low_water_mark = *(const uint32*)value;
1550 			return B_OK;
1551 
1552 		case SO_RCVLOWAT:
1553 			if (length != sizeof(uint32))
1554 				return B_BAD_VALUE;
1555 
1556 			socket->receive.low_water_mark = *(const uint32*)value;
1557 			return B_OK;
1558 
1559 		case SO_RCVTIMEO:
1560 		case SO_SNDTIMEO:
1561 		{
1562 			if (length != sizeof(struct timeval))
1563 				return B_BAD_VALUE;
1564 
1565 			const struct timeval* timeval = (const struct timeval*)value;
1566 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1567 			if (timeout == 0)
1568 				timeout = B_INFINITE_TIMEOUT;
1569 
1570 			if (option == SO_SNDTIMEO)
1571 				socket->send.timeout = timeout;
1572 			else
1573 				socket->receive.timeout = timeout;
1574 			return B_OK;
1575 		}
1576 
1577 		case SO_NONBLOCK:
1578 			if (length != sizeof(int32))
1579 				return B_BAD_VALUE;
1580 
1581 			if (*(const int32*)value) {
1582 				socket->send.timeout = 0;
1583 				socket->receive.timeout = 0;
1584 			} else {
1585 				socket->send.timeout = B_INFINITE_TIMEOUT;
1586 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1587 			}
1588 			return B_OK;
1589 
1590 		case SO_BROADCAST:
1591 		case SO_DEBUG:
1592 		case SO_DONTROUTE:
1593 		case SO_KEEPALIVE:
1594 		case SO_OOBINLINE:
1595 		case SO_REUSEADDR:
1596 		case SO_REUSEPORT:
1597 		case SO_USELOOPBACK:
1598 			if (length != sizeof(int32))
1599 				return B_BAD_VALUE;
1600 
1601 			if (*(const int32*)value)
1602 				socket->options |= option;
1603 			else
1604 				socket->options &= ~option;
1605 			return B_OK;
1606 
1607 		case SO_BINDTODEVICE:
1608 		{
1609 			if (length != sizeof(uint32))
1610 				return B_BAD_VALUE;
1611 
1612 			// TODO: we might want to check if the device exists at all
1613 			// (although it doesn't really harm when we don't)
1614 			socket->bound_to_device = *(const uint32*)value;
1615 			return B_OK;
1616 		}
1617 
1618 		default:
1619 			break;
1620 	}
1621 
1622 	dprintf("socket_setsockopt: unknown option %d\n", option);
1623 	return ENOPROTOOPT;
1624 }
1625 
1626 
1627 int
1628 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1629 	int length)
1630 {
1631 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1632 		level, option, value, length);
1633 }
1634 
1635 
1636 int
1637 socket_shutdown(net_socket* socket, int direction)
1638 {
1639 	return socket->first_info->shutdown(socket->first_protocol, direction);
1640 }
1641 
1642 
1643 status_t
1644 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1645 {
1646 	sockets[0] = NULL;
1647 	sockets[1] = NULL;
1648 
1649 	// create sockets
1650 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1651 	if (error != B_OK)
1652 		return error;
1653 
1654 	if (error == B_OK)
1655 		error = socket_open(family, type, protocol, &sockets[1]);
1656 
1657 	// bind one
1658 	if (error == B_OK)
1659 		error = socket_bind(sockets[0], NULL, 0);
1660 
1661 	// start listening
1662 	if (error == B_OK)
1663 		error = socket_listen(sockets[0], 1);
1664 
1665 	// connect them
1666 	if (error == B_OK) {
1667 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1668 			sockets[0]->address.ss_len);
1669 	}
1670 
1671 	// accept a socket
1672 	net_socket* acceptedSocket = NULL;
1673 	if (error == B_OK)
1674 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1675 
1676 	if (error == B_OK) {
1677 		// everything worked: close the listener socket
1678 		socket_close(sockets[0]);
1679 		socket_free(sockets[0]);
1680 		sockets[0] = acceptedSocket;
1681 	} else {
1682 		// close sockets on error
1683 		for (int i = 0; i < 2; i++) {
1684 			if (sockets[i] != NULL) {
1685 				socket_close(sockets[i]);
1686 				socket_free(sockets[i]);
1687 				sockets[i] = NULL;
1688 			}
1689 		}
1690 	}
1691 
1692 	return error;
1693 }
1694 
1695 
1696 //	#pragma mark -
1697 
1698 
1699 static status_t
1700 socket_std_ops(int32 op, ...)
1701 {
1702 	switch (op) {
1703 		case B_MODULE_INIT:
1704 		{
1705 			new (&sSocketList) SocketList;
1706 			mutex_init(&sSocketLock, "socket list");
1707 
1708 #if ENABLE_DEBUGGER_COMMANDS
1709 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1710 			add_debugger_command("socket", dump_socket, "dumps a socket");
1711 #endif
1712 			return B_OK;
1713 		}
1714 		case B_MODULE_UNINIT:
1715 			ASSERT(sSocketList.IsEmpty());
1716 			mutex_destroy(&sSocketLock);
1717 
1718 #if ENABLE_DEBUGGER_COMMANDS
1719 			remove_debugger_command("socket", dump_socket);
1720 			remove_debugger_command("sockets", dump_sockets);
1721 #endif
1722 			return B_OK;
1723 
1724 		default:
1725 			return B_ERROR;
1726 	}
1727 }
1728 
1729 
1730 net_socket_module_info gNetSocketModule = {
1731 	{
1732 		NET_SOCKET_MODULE_NAME,
1733 		0,
1734 		socket_std_ops
1735 	},
1736 	socket_open,
1737 	socket_close,
1738 	socket_free,
1739 
1740 	socket_readv,
1741 	socket_writev,
1742 	socket_control,
1743 
1744 	socket_read_avail,
1745 	socket_send_avail,
1746 
1747 	socket_send_data,
1748 	socket_receive_data,
1749 
1750 	socket_get_option,
1751 	socket_set_option,
1752 
1753 	socket_get_next_stat,
1754 
1755 	// connections
1756 	socket_acquire,
1757 	socket_release,
1758 	socket_spawn_pending,
1759 	socket_dequeue_connected,
1760 	socket_count_connected,
1761 	socket_set_max_backlog,
1762 	socket_has_parent,
1763 	socket_connected,
1764 	socket_aborted,
1765 
1766 	// notifications
1767 	socket_request_notification,
1768 	socket_cancel_notification,
1769 	socket_notify,
1770 
1771 	// standard socket API
1772 	socket_accept,
1773 	socket_bind,
1774 	socket_connect,
1775 	socket_getpeername,
1776 	socket_getsockname,
1777 	socket_getsockopt,
1778 	socket_listen,
1779 	socket_receive,
1780 	socket_send,
1781 	socket_setsockopt,
1782 	socket_shutdown,
1783 	socket_socketpair
1784 };
1785 
1786