xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 4a55cc230cf7566cadcbb23b1928eefff8aea9a2)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static status_t
166 create_socket(int family, int type, int protocol, net_socket_private** _socket)
167 {
168 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
169 	if (socket == NULL)
170 		return B_NO_MEMORY;
171 	status_t status = socket->InitCheck();
172 	if (status != B_OK) {
173 		delete socket;
174 		return status;
175 	}
176 
177 	socket->family = family;
178 	socket->type = type;
179 	socket->protocol = protocol;
180 
181 	status = get_domain_protocols(socket);
182 	if (status != B_OK) {
183 		delete socket;
184 		return status;
185 	}
186 
187 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
188 		socket->type, socket->protocol);
189 
190 #ifdef TRACE_SOCKET
191 	net_protocol* current = socket->first_protocol;
192 	for (int i = 0; current != NULL; current = current->next, i++)
193 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
194 #endif
195 
196 	*_socket = socket;
197 	return B_OK;
198 }
199 
200 
201 static status_t
202 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
203 	void* data, size_t dataLen)
204 {
205 	cmsghdr* header = (cmsghdr*)data;
206 
207 	if (dataLen == 0)
208 		return B_OK;
209 
210 	if (socket->first_info->add_ancillary_data == NULL)
211 		return B_NOT_SUPPORTED;
212 
213 	while (true) {
214 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
215 			return B_BAD_VALUE;
216 
217 		status_t status = socket->first_info->add_ancillary_data(
218 			socket->first_protocol, container, header);
219 		if (status != B_OK)
220 			return status;
221 
222 		if (dataLen <= _ALIGN(header->cmsg_len))
223 			break;
224 		dataLen -= _ALIGN(header->cmsg_len);
225 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
226 	}
227 
228 	return B_OK;
229 }
230 
231 
232 static status_t
233 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
234 	msghdr* messageHeader)
235 {
236 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
237 	int dataBufferLen = messageHeader->msg_controllen;
238 
239 	if (container == NULL || dataBuffer == NULL) {
240 		messageHeader->msg_controllen = 0;
241 		return B_OK;
242 	}
243 
244 	ancillary_data_header header;
245 	void* data = NULL;
246 
247 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
248 		if (socket->first_info->process_ancillary_data == NULL)
249 			return B_NOT_SUPPORTED;
250 
251 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
252 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
253 		if (bytesWritten < 0)
254 			return bytesWritten;
255 
256 		dataBuffer += bytesWritten;
257 		dataBufferLen -= bytesWritten;
258 	}
259 
260 	messageHeader->msg_controllen -= dataBufferLen;
261 
262 	return B_OK;
263 }
264 
265 
266 static status_t
267 process_ancillary_data(net_socket* socket,
268 	net_buffer* buffer, msghdr* messageHeader)
269 {
270 	void *dataBuffer = messageHeader->msg_control;
271 	ssize_t bytesWritten;
272 
273 	if (dataBuffer == NULL) {
274 		messageHeader->msg_controllen = 0;
275 		return B_OK;
276 	}
277 
278 	if (socket->first_info->process_ancillary_data_no_container == NULL)
279 		return B_NOT_SUPPORTED;
280 
281 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
282 		socket->first_protocol, buffer, dataBuffer,
283 		messageHeader->msg_controllen);
284 	if (bytesWritten < 0)
285 		return bytesWritten;
286 	messageHeader->msg_controllen = bytesWritten;
287 
288 	return B_OK;
289 }
290 
291 
292 static ssize_t
293 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
294 	size_t length, int flags)
295 {
296 	iovec stackVec = { data, length };
297 	iovec* vecs = header ? header->msg_iov : &stackVec;
298 	int vecCount = header ? header->msg_iovlen : 1;
299 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
300 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
301 
302 	ancillary_data_container* ancillaryData = NULL;
303 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
304 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
305 		addressLen);
306 	if (bytesRead < 0)
307 		return bytesRead;
308 
309 	CObjectDeleter<
310 		ancillary_data_container, void, delete_ancillary_data_container>
311 		ancillaryDataDeleter(ancillaryData);
312 
313 	// process ancillary data
314 	if (header != NULL) {
315 		status_t status = process_ancillary_data(socket, ancillaryData, header);
316 		if (status != B_OK)
317 			return status;
318 
319 		header->msg_flags = 0;
320 	}
321 
322 	return bytesRead;
323 }
324 
325 
326 #if ENABLE_DEBUGGER_COMMANDS
327 
328 
329 static void
330 print_socket_line(net_socket_private* socket, const char* prefix)
331 {
332 	BReference<net_socket_private> parent;
333 	if (socket->parent.PrivatePointer() != NULL)
334 		parent = socket->parent.GetReference();
335 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
336 		socket->family, socket->type, socket->protocol, socket->owner,
337 		socket->first_protocol, socket->first_info, parent.Get(),
338 		parent.IsSet() ? socket->is_connected ? " (c)" : " (p)" : "");
339 }
340 
341 
342 static int
343 dump_socket(int argc, char** argv)
344 {
345 	if (argc < 2) {
346 		kprintf("usage: %s [address]\n", argv[0]);
347 		return 0;
348 	}
349 
350 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
351 
352 	kprintf("SOCKET %p\n", socket);
353 	kprintf("  family.type.protocol: %d.%d.%d\n",
354 		socket->family, socket->type, socket->protocol);
355 	BReference<net_socket_private> parent;
356 	if (socket->parent.PrivatePointer() != NULL)
357 		parent = socket->parent.GetReference();
358 	kprintf("  parent:               %p\n", parent.Get());
359 	kprintf("  first protocol:       %p\n", socket->first_protocol);
360 	kprintf("  first module_info:    %p\n", socket->first_info);
361 	kprintf("  options:              %x\n", socket->options);
362 	kprintf("  linger:               %d\n", socket->linger);
363 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
364 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
365 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
366 	kprintf("  is connected:         %d\n", socket->is_connected);
367 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
368 
369 	if (socket->child_count == 0)
370 		return 0;
371 
372 	kprintf("    pending children:\n");
373 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
374 	while (net_socket_private* child = iterator.Next()) {
375 		print_socket_line(child, "      ");
376 	}
377 
378 	kprintf("    connected children:\n");
379 	iterator = socket->connected_children.GetIterator();
380 	while (net_socket_private* child = iterator.Next()) {
381 		print_socket_line(child, "      ");
382 	}
383 
384 	return 0;
385 }
386 
387 
388 static int
389 dump_sockets(int argc, char** argv)
390 {
391 	kprintf("address        kind  owner protocol   module_info parent\n");
392 
393 	SocketList::Iterator iterator = sSocketList.GetIterator();
394 	while (net_socket_private* socket = iterator.Next()) {
395 		print_socket_line(socket, "");
396 
397 		SocketList::Iterator childIterator
398 			= socket->pending_children.GetIterator();
399 		while (net_socket_private* child = childIterator.Next()) {
400 			print_socket_line(child, " ");
401 		}
402 
403 		childIterator = socket->connected_children.GetIterator();
404 		while (net_socket_private* child = childIterator.Next()) {
405 			print_socket_line(child, " ");
406 		}
407 	}
408 
409 	return 0;
410 }
411 
412 
413 #endif	// ENABLE_DEBUGGER_COMMANDS
414 
415 
416 //	#pragma mark -
417 
418 
419 status_t
420 socket_open(int family, int type, int protocol, net_socket** _socket)
421 {
422 	net_socket_private* socket;
423 	status_t status = create_socket(family, type, protocol, &socket);
424 	if (status != B_OK)
425 		return status;
426 
427 	status = socket->first_info->open(socket->first_protocol);
428 	if (status != B_OK) {
429 		delete socket;
430 		return status;
431 	}
432 
433 	socket->owner = team_get_current_team_id();
434 	socket->is_in_socket_list = true;
435 
436 	mutex_lock(&sSocketLock);
437 	sSocketList.Add(socket);
438 	mutex_unlock(&sSocketLock);
439 
440 	*_socket = socket;
441 	return B_OK;
442 }
443 
444 
445 status_t
446 socket_close(net_socket* _socket)
447 {
448 	net_socket_private* socket = (net_socket_private*)_socket;
449 	return socket->first_info->close(socket->first_protocol);
450 }
451 
452 
453 void
454 socket_free(net_socket* _socket)
455 {
456 	net_socket_private* socket = (net_socket_private*)_socket;
457 	socket->first_info->free(socket->first_protocol);
458 	socket->ReleaseReference();
459 }
460 
461 
462 status_t
463 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
464 	size_t* _length)
465 {
466 	return -1;
467 }
468 
469 
470 status_t
471 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
472 	size_t* _length)
473 {
474 	if (socket->peer.ss_len == 0)
475 		return ECONNRESET;
476 
477 	if (socket->address.ss_len == 0) {
478 		// try to bind first
479 		status_t status = socket_bind(socket, NULL, 0);
480 		if (status != B_OK)
481 			return status;
482 	}
483 
484 	// TODO: useful, maybe even computed header space!
485 	net_buffer* buffer = gNetBufferModule.create(256);
486 	if (buffer == NULL)
487 		return ENOBUFS;
488 
489 	// copy data into buffer
490 
491 	for (uint32 i = 0; i < vecCount; i++) {
492 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
493 				vecs[i].iov_len) < B_OK) {
494 			gNetBufferModule.free(buffer);
495 			return ENOBUFS;
496 		}
497 	}
498 
499 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
500 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
501 	size_t size = buffer->size;
502 
503 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
504 		buffer);
505 	if (bytesWritten < B_OK) {
506 		if (buffer->size != size) {
507 			// this appears to be a partial write
508 			*_length = size - buffer->size;
509 		}
510 		gNetBufferModule.free(buffer);
511 		return bytesWritten;
512 	}
513 
514 	*_length = bytesWritten;
515 	return B_OK;
516 }
517 
518 
519 status_t
520 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
521 {
522 	switch (op) {
523 		case FIONBIO:
524 		{
525 			if (data == NULL)
526 				return B_BAD_VALUE;
527 
528 			int value;
529 			if (is_syscall()) {
530 				if (!IS_USER_ADDRESS(data)
531 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
532 					return B_BAD_ADDRESS;
533 				}
534 			} else
535 				value = *(int*)data;
536 
537 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
538 				sizeof(int));
539 		}
540 
541 		case FIONREAD:
542 		{
543 			if (data == NULL)
544 				return B_BAD_VALUE;
545 
546 			int available = (int)socket_read_avail(socket);
547 			if (available < 0)
548 				return available;
549 
550 			if (is_syscall()) {
551 				if (!IS_USER_ADDRESS(data)
552 					|| user_memcpy(data, &available, sizeof(available))
553 						!= B_OK) {
554 					return B_BAD_ADDRESS;
555 				}
556 			} else
557 				*(int*)data = available;
558 
559 			return B_OK;
560 		}
561 
562 		case B_SET_BLOCKING_IO:
563 		case B_SET_NONBLOCKING_IO:
564 		{
565 			int value = op == B_SET_NONBLOCKING_IO;
566 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
567 				sizeof(int));
568 		}
569 	}
570 
571 	return socket->first_info->control(socket->first_protocol,
572 		LEVEL_DRIVER_IOCTL, op, data, &length);
573 }
574 
575 
576 ssize_t
577 socket_read_avail(net_socket* socket)
578 {
579 	return socket->first_info->read_avail(socket->first_protocol);
580 }
581 
582 
583 ssize_t
584 socket_send_avail(net_socket* socket)
585 {
586 	return socket->first_info->send_avail(socket->first_protocol);
587 }
588 
589 
590 status_t
591 socket_send_data(net_socket* socket, net_buffer* buffer)
592 {
593 	return socket->first_info->send_data(socket->first_protocol,
594 		buffer);
595 }
596 
597 
598 status_t
599 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
600 	net_buffer** _buffer)
601 {
602 	status_t status = socket->first_info->read_data(socket->first_protocol,
603 		length, flags, _buffer);
604 	if (status != B_OK)
605 		return status;
606 
607 	if (*_buffer && length < (*_buffer)->size) {
608 		// discard any data behind the amount requested
609 		gNetBufferModule.trim(*_buffer, length);
610 	}
611 
612 	return status;
613 }
614 
615 
616 status_t
617 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
618 {
619 	MutexLocker locker(sSocketLock);
620 
621 	net_socket_private* socket = NULL;
622 	SocketList::Iterator iterator = sSocketList.GetIterator();
623 	uint32 cookie = *_cookie;
624 	uint32 count = 0;
625 
626 	while (true) {
627 		socket = iterator.Next();
628 		if (socket == NULL)
629 			return B_ENTRY_NOT_FOUND;
630 
631 		// TODO: also traverse the pending connections
632 		if (count == cookie)
633 			break;
634 
635 		if (family == -1 || family == socket->family)
636 			count++;
637 	}
638 
639 	*_cookie = count + 1;
640 
641 	stat->family = socket->family;
642 	stat->type = socket->type;
643 	stat->protocol = socket->protocol;
644 	stat->owner = socket->owner;
645 	stat->state[0] = '\0';
646 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
647 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
648 	stat->receive_queue_size = 0;
649 	stat->send_queue_size = 0;
650 
651 	// fill in protocol specific data (if supported by the protocol)
652 	size_t length = sizeof(net_stat);
653 	socket->first_info->control(socket->first_protocol, socket->protocol,
654 		NET_STAT_SOCKET, stat, &length);
655 
656 	return B_OK;
657 }
658 
659 
660 //	#pragma mark - connections
661 
662 
663 bool
664 socket_acquire(net_socket* _socket)
665 {
666 	net_socket_private* socket = (net_socket_private*)_socket;
667 
668 	// During destruction, the socket might still be accessible over its
669 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
670 	// socket anymore -- while not obvious, the endpoint protocol is responsible
671 	// for the proper locking here.
672 	if (socket->CountReferences() == 0)
673 		return false;
674 
675 	socket->AcquireReference();
676 	return true;
677 }
678 
679 
680 bool
681 socket_release(net_socket* _socket)
682 {
683 	net_socket_private* socket = (net_socket_private*)_socket;
684 	return socket->ReleaseReference();
685 }
686 
687 
688 status_t
689 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
690 {
691 	net_socket_private* parent = (net_socket_private*)_parent;
692 
693 	TRACE("%s(%p)\n", __FUNCTION__, parent);
694 
695 	MutexLocker locker(parent->lock);
696 
697 	// We actually accept more pending connections to compensate for those
698 	// that never complete, and also make sure at least a single connection
699 	// can always be accepted
700 	if (parent->child_count > 3 * parent->max_backlog / 2)
701 		return ENOBUFS;
702 
703 	net_socket_private* socket;
704 	status_t status = create_socket(parent->family, parent->type,
705 		parent->protocol, &socket);
706 	if (status != B_OK)
707 		return status;
708 
709 	// inherit parent's properties
710 	socket->send = parent->send;
711 	socket->receive = parent->receive;
712 	socket->options = parent->options & ~SO_ACCEPTCONN;
713 	socket->linger = parent->linger;
714 	socket->owner = parent->owner;
715 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
716 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
717 
718 	// add to the parent's list of pending connections
719 	parent->pending_children.Add(socket);
720 	socket->parent = parent;
721 	parent->child_count++;
722 
723 	*_socket = socket;
724 	return B_OK;
725 }
726 
727 
728 /*!	Dequeues a connected child from a parent socket.
729 	It also returns a reference with the child socket.
730 */
731 status_t
732 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
733 {
734 	net_socket_private* parent = (net_socket_private*)_parent;
735 
736 	mutex_lock(&parent->lock);
737 
738 	net_socket_private* socket = parent->connected_children.RemoveHead();
739 	if (socket != NULL) {
740 		socket->AcquireReference();
741 		socket->RemoveFromParent();
742 		parent->child_count--;
743 		*_socket = socket;
744 	}
745 
746 	mutex_unlock(&parent->lock);
747 
748 	if (socket == NULL)
749 		return B_ENTRY_NOT_FOUND;
750 
751 	return B_OK;
752 }
753 
754 
755 ssize_t
756 socket_count_connected(net_socket* _parent)
757 {
758 	net_socket_private* parent = (net_socket_private*)_parent;
759 
760 	MutexLocker _(parent->lock);
761 	return parent->connected_children.Count();
762 }
763 
764 
765 status_t
766 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
767 {
768 	net_socket_private* socket = (net_socket_private*)_socket;
769 
770 	// we enforce an upper limit of connections waiting to be accepted
771 	if (backlog > 256)
772 		backlog = 256;
773 
774 	MutexLocker _(socket->lock);
775 
776 	// first remove the pending connections, then the already connected
777 	// ones as needed
778 	net_socket_private* child;
779 	while (socket->child_count > backlog
780 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
781 		child->RemoveFromParent();
782 		socket->child_count--;
783 	}
784 	while (socket->child_count > backlog
785 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
786 		child->RemoveFromParent();
787 		socket->child_count--;
788 	}
789 
790 	socket->max_backlog = backlog;
791 	return B_OK;
792 }
793 
794 
795 /*!	Returns whether or not this socket has a parent. The parent might not be
796 	valid anymore, though.
797 */
798 bool
799 socket_has_parent(net_socket* _socket)
800 {
801 	net_socket_private* socket = (net_socket_private*)_socket;
802 	return socket->parent != NULL;
803 }
804 
805 
806 /*!	The socket has been connected. It will be moved to the connected queue
807 	of its parent socket.
808 */
809 status_t
810 socket_connected(net_socket* _socket)
811 {
812 	net_socket_private* socket = (net_socket_private*)_socket;
813 
814 	TRACE("socket_connected(%p)\n", socket);
815 
816 	if (socket->parent == NULL) {
817 		socket->is_connected = true;
818 		return B_OK;
819 	}
820 
821 	BReference<net_socket_private> parent = socket->parent.GetReference();
822 	if (!parent.IsSet())
823 		return B_BAD_VALUE;
824 
825 	MutexLocker _(parent->lock);
826 
827 	parent->pending_children.Remove(socket);
828 	parent->connected_children.Add(socket);
829 	socket->is_connected = true;
830 
831 	// notify parent
832 	if (parent->select_pool)
833 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
834 
835 	return B_OK;
836 }
837 
838 
839 /*!	The socket has been aborted. Steals the parent's reference, and releases
840 	it.
841 */
842 status_t
843 socket_aborted(net_socket* _socket)
844 {
845 	net_socket_private* socket = (net_socket_private*)_socket;
846 
847 	TRACE("socket_aborted(%p)\n", socket);
848 
849 	BReference<net_socket_private> parent = socket->parent.GetReference();
850 	if (!parent.IsSet())
851 		return B_BAD_VALUE;
852 
853 	MutexLocker _(parent->lock);
854 
855 	if (socket->is_connected)
856 		parent->connected_children.Remove(socket);
857 	else
858 		parent->pending_children.Remove(socket);
859 
860 	parent->child_count--;
861 	socket->RemoveFromParent();
862 
863 	return B_OK;
864 }
865 
866 
867 //	#pragma mark - notifications
868 
869 
870 status_t
871 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
872 {
873 	net_socket_private* socket = (net_socket_private*)_socket;
874 
875 	mutex_lock(&socket->lock);
876 
877 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
878 		event);
879 
880 	mutex_unlock(&socket->lock);
881 
882 	if (status != B_OK)
883 		return status;
884 
885 	// check if the event is already present
886 	// TODO: add support for poll() types
887 
888 	switch (event) {
889 		case B_SELECT_READ:
890 		{
891 			ssize_t available = socket_read_avail(socket);
892 			if ((ssize_t)socket->receive.low_water_mark <= available
893 				|| available < B_OK)
894 				notify_select_event(sync, event);
895 			break;
896 		}
897 		case B_SELECT_WRITE:
898 		{
899 			if ((socket->options & SO_ACCEPTCONN) != 0)
900 				break;
901 
902 			ssize_t available = socket_send_avail(socket);
903 			if ((ssize_t)socket->send.low_water_mark <= available
904 				|| available < B_OK)
905 				notify_select_event(sync, event);
906 			break;
907 		}
908 		case B_SELECT_ERROR:
909 			if (socket->error != B_OK)
910 				notify_select_event(sync, event);
911 			break;
912 	}
913 
914 	return B_OK;
915 }
916 
917 
918 status_t
919 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
920 {
921 	net_socket_private* socket = (net_socket_private*)_socket;
922 
923 	MutexLocker _(socket->lock);
924 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
925 }
926 
927 
928 status_t
929 socket_notify(net_socket* _socket, uint8 event, int32 value)
930 {
931 	net_socket_private* socket = (net_socket_private*)_socket;
932 	bool notify = true;
933 
934 	switch (event) {
935 		case B_SELECT_READ:
936 			if ((ssize_t)socket->receive.low_water_mark > value
937 				&& value >= B_OK)
938 				notify = false;
939 			break;
940 
941 		case B_SELECT_WRITE:
942 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
943 				notify = false;
944 			break;
945 
946 		case B_SELECT_ERROR:
947 			socket->error = value;
948 			break;
949 	}
950 
951 	MutexLocker _(socket->lock);
952 
953 	if (notify && socket->select_pool != NULL) {
954 		notify_select_event_pool(socket->select_pool, event);
955 
956 		if (event == B_SELECT_ERROR) {
957 			// always notify read/write on error
958 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
959 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
960 		}
961 	}
962 
963 	return B_OK;
964 }
965 
966 
967 //	#pragma mark - standard socket API
968 
969 
970 int
971 socket_accept(net_socket* socket, struct sockaddr* address,
972 	socklen_t* _addressLength, net_socket** _acceptedSocket)
973 {
974 	if ((socket->options & SO_ACCEPTCONN) == 0)
975 		return B_BAD_VALUE;
976 
977 	net_socket* accepted;
978 	status_t status = socket->first_info->accept(socket->first_protocol,
979 		&accepted);
980 	if (status != B_OK)
981 		return status;
982 
983 	if (address && *_addressLength > 0) {
984 		memcpy(address, &accepted->peer, min_c(*_addressLength,
985 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
986 		*_addressLength = accepted->peer.ss_len;
987 	}
988 
989 	*_acceptedSocket = accepted;
990 	return B_OK;
991 }
992 
993 
994 int
995 socket_bind(net_socket* socket, const struct sockaddr* address,
996 	socklen_t addressLength)
997 {
998 	sockaddr empty;
999 	if (address == NULL) {
1000 		// special - try to bind to an empty address, like INADDR_ANY
1001 		memset(&empty, 0, sizeof(sockaddr));
1002 		empty.sa_len = sizeof(sockaddr);
1003 		empty.sa_family = socket->family;
1004 
1005 		address = &empty;
1006 		addressLength = sizeof(sockaddr);
1007 	}
1008 
1009 	if (socket->address.ss_len != 0) {
1010 		status_t status = socket->first_info->unbind(socket->first_protocol,
1011 			(sockaddr*)&socket->address);
1012 		if (status != B_OK)
1013 			return status;
1014 	}
1015 
1016 	memcpy(&socket->address, address, sizeof(sockaddr));
1017 	socket->address.ss_len = sizeof(sockaddr_storage);
1018 
1019 	status_t status = socket->first_info->bind(socket->first_protocol,
1020 		(sockaddr*)address);
1021 	if (status != B_OK) {
1022 		// clear address again, as binding failed
1023 		socket->address.ss_len = 0;
1024 	}
1025 
1026 	return status;
1027 }
1028 
1029 
1030 int
1031 socket_connect(net_socket* socket, const struct sockaddr* address,
1032 	socklen_t addressLength)
1033 {
1034 	if (address == NULL || addressLength == 0)
1035 		return ENETUNREACH;
1036 
1037 	if (socket->address.ss_len == 0) {
1038 		// try to bind first
1039 		status_t status = socket_bind(socket, NULL, 0);
1040 		if (status != B_OK)
1041 			return status;
1042 	}
1043 
1044 	return socket->first_info->connect(socket->first_protocol, address);
1045 }
1046 
1047 
1048 int
1049 socket_getpeername(net_socket* _socket, struct sockaddr* address,
1050 	socklen_t* _addressLength)
1051 {
1052 	net_socket_private* socket = (net_socket_private*)_socket;
1053 	if (!socket->is_connected || socket->peer.ss_len == 0)
1054 		return ENOTCONN;
1055 
1056 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1057 	*_addressLength = socket->peer.ss_len;
1058 	return B_OK;
1059 }
1060 
1061 
1062 int
1063 socket_getsockname(net_socket* socket, struct sockaddr* address,
1064 	socklen_t* _addressLength)
1065 {
1066 	if (socket->address.ss_len == 0) {
1067 		struct sockaddr buffer;
1068 		memset(&buffer, 0, sizeof(buffer));
1069 		buffer.sa_family = socket->family;
1070 
1071 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
1072 		*_addressLength = sizeof(buffer);
1073 		return B_OK;
1074 	}
1075 
1076 	memcpy(address, &socket->address, min_c(*_addressLength,
1077 		socket->address.ss_len));
1078 	*_addressLength = socket->address.ss_len;
1079 	return B_OK;
1080 }
1081 
1082 
1083 status_t
1084 socket_get_option(net_socket* socket, int level, int option, void* value,
1085 	int* _length)
1086 {
1087 	if (level != SOL_SOCKET)
1088 		return ENOPROTOOPT;
1089 
1090 	switch (option) {
1091 		case SO_SNDBUF:
1092 		{
1093 			uint32* size = (uint32*)value;
1094 			*size = socket->send.buffer_size;
1095 			*_length = sizeof(uint32);
1096 			return B_OK;
1097 		}
1098 
1099 		case SO_RCVBUF:
1100 		{
1101 			uint32* size = (uint32*)value;
1102 			*size = socket->receive.buffer_size;
1103 			*_length = sizeof(uint32);
1104 			return B_OK;
1105 		}
1106 
1107 		case SO_SNDLOWAT:
1108 		{
1109 			uint32* size = (uint32*)value;
1110 			*size = socket->send.low_water_mark;
1111 			*_length = sizeof(uint32);
1112 			return B_OK;
1113 		}
1114 
1115 		case SO_RCVLOWAT:
1116 		{
1117 			uint32* size = (uint32*)value;
1118 			*size = socket->receive.low_water_mark;
1119 			*_length = sizeof(uint32);
1120 			return B_OK;
1121 		}
1122 
1123 		case SO_RCVTIMEO:
1124 		case SO_SNDTIMEO:
1125 		{
1126 			if (*_length < (int)sizeof(struct timeval))
1127 				return B_BAD_VALUE;
1128 
1129 			bigtime_t timeout;
1130 			if (option == SO_SNDTIMEO)
1131 				timeout = socket->send.timeout;
1132 			else
1133 				timeout = socket->receive.timeout;
1134 			if (timeout == B_INFINITE_TIMEOUT)
1135 				timeout = 0;
1136 
1137 			struct timeval* timeval = (struct timeval*)value;
1138 			timeval->tv_sec = timeout / 1000000LL;
1139 			timeval->tv_usec = timeout % 1000000LL;
1140 
1141 			*_length = sizeof(struct timeval);
1142 			return B_OK;
1143 		}
1144 
1145 		case SO_NONBLOCK:
1146 		{
1147 			int32* _set = (int32*)value;
1148 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1149 			*_length = sizeof(int32);
1150 			return B_OK;
1151 		}
1152 
1153 		case SO_ACCEPTCONN:
1154 		case SO_BROADCAST:
1155 		case SO_DEBUG:
1156 		case SO_DONTROUTE:
1157 		case SO_KEEPALIVE:
1158 		case SO_OOBINLINE:
1159 		case SO_REUSEADDR:
1160 		case SO_REUSEPORT:
1161 		case SO_USELOOPBACK:
1162 		{
1163 			int32* _set = (int32*)value;
1164 			*_set = (socket->options & option) != 0;
1165 			*_length = sizeof(int32);
1166 			return B_OK;
1167 		}
1168 
1169 		case SO_TYPE:
1170 		{
1171 			int32* _set = (int32*)value;
1172 			*_set = socket->type;
1173 			*_length = sizeof(int32);
1174 			return B_OK;
1175 		}
1176 
1177 		case SO_ERROR:
1178 		{
1179 			int32* _set = (int32*)value;
1180 			*_set = socket->error;
1181 			*_length = sizeof(int32);
1182 
1183 			socket->error = B_OK;
1184 				// clear error upon retrieval
1185 			return B_OK;
1186 		}
1187 
1188 		default:
1189 			break;
1190 	}
1191 
1192 	dprintf("socket_getsockopt: unknown option %d\n", option);
1193 	return ENOPROTOOPT;
1194 }
1195 
1196 
1197 int
1198 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1199 	int* _length)
1200 {
1201 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1202 		level, option, value, _length);
1203 }
1204 
1205 
1206 int
1207 socket_listen(net_socket* socket, int backlog)
1208 {
1209 	status_t status = socket->first_info->listen(socket->first_protocol,
1210 		backlog);
1211 	if (status == B_OK)
1212 		socket->options |= SO_ACCEPTCONN;
1213 
1214 	return status;
1215 }
1216 
1217 
1218 ssize_t
1219 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1220 	int flags)
1221 {
1222 	// If the protocol sports read_data_no_buffer() we use it.
1223 	if (socket->first_info->read_data_no_buffer != NULL)
1224 		return socket_receive_no_buffer(socket, header, data, length, flags);
1225 
1226 	size_t totalLength = length;
1227 	net_buffer* buffer;
1228 	int i;
1229 
1230 	// the convention to this function is that have header been
1231 	// present, { data, length } would have been iovec[0] and is
1232 	// always considered like that
1233 
1234 	if (header) {
1235 		// calculate the length considering all of the extra buffers
1236 		for (i = 1; i < header->msg_iovlen; i++)
1237 			totalLength += header->msg_iov[i].iov_len;
1238 	}
1239 
1240 	status_t status = socket->first_info->read_data(
1241 		socket->first_protocol, totalLength, flags, &buffer);
1242 	if (status != B_OK)
1243 		return status;
1244 
1245 	// process ancillary data
1246 	if (header != NULL) {
1247 		if (buffer != NULL && header->msg_control != NULL) {
1248 			ancillary_data_container* container
1249 				= gNetBufferModule.get_ancillary_data(buffer);
1250 			if (container != NULL)
1251 				status = process_ancillary_data(socket, container, header);
1252 			else
1253 				status = process_ancillary_data(socket, buffer, header);
1254 			if (status != B_OK) {
1255 				gNetBufferModule.free(buffer);
1256 				return status;
1257 			}
1258 		} else
1259 			header->msg_controllen = 0;
1260 	}
1261 
1262 	// TODO: - returning a NULL buffer when received 0 bytes
1263 	//         may not make much sense as we still need the address
1264 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1265 
1266 	size_t nameLen = 0;
1267 
1268 	if (header) {
1269 		// TODO: - consider the control buffer options
1270 		nameLen = header->msg_namelen;
1271 		header->msg_namelen = 0;
1272 		header->msg_flags = 0;
1273 	}
1274 
1275 	if (buffer == NULL)
1276 		return 0;
1277 
1278 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1279 
1280 	length = min_c(bytesReceived, length);
1281 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1282 		gNetBufferModule.free(buffer);
1283 		return ENOBUFS;
1284 	}
1285 
1286 	// if first copy was a success, proceed to following
1287 	// copies as required
1288 	bytesCopied += length;
1289 
1290 	if (header) {
1291 		// we only start considering at iovec[1]
1292 		// as { data, length } is iovec[0]
1293 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1294 			iovec& vec = header->msg_iov[i];
1295 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1296 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1297 					toRead) < B_OK) {
1298 				break;
1299 			}
1300 
1301 			bytesCopied += toRead;
1302 		}
1303 
1304 		if (header->msg_name != NULL) {
1305 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1306 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1307 		}
1308 	}
1309 
1310 	gNetBufferModule.free(buffer);
1311 
1312 	if (bytesCopied < bytesReceived) {
1313 		if (header)
1314 			header->msg_flags = MSG_TRUNC;
1315 
1316 		if (flags & MSG_TRUNC)
1317 			return bytesReceived;
1318 	}
1319 
1320 	return bytesCopied;
1321 }
1322 
1323 
1324 ssize_t
1325 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1326 	int flags)
1327 {
1328 	const sockaddr* address = NULL;
1329 	socklen_t addressLength = 0;
1330 	size_t bytesLeft = length;
1331 
1332 	if (length > SSIZE_MAX)
1333 		return B_BAD_VALUE;
1334 
1335 	ancillary_data_container* ancillaryData = NULL;
1336 	CObjectDeleter<
1337 		ancillary_data_container, void, delete_ancillary_data_container>
1338 		ancillaryDataDeleter;
1339 
1340 	if (header != NULL) {
1341 		address = (const sockaddr*)header->msg_name;
1342 		addressLength = header->msg_namelen;
1343 
1344 		// get the ancillary data
1345 		if (header->msg_control != NULL) {
1346 			ancillaryData = create_ancillary_data_container();
1347 			if (ancillaryData == NULL)
1348 				return B_NO_MEMORY;
1349 			ancillaryDataDeleter.SetTo(ancillaryData);
1350 
1351 			status_t status = add_ancillary_data(socket, ancillaryData,
1352 				(cmsghdr*)header->msg_control, header->msg_controllen);
1353 			if (status != B_OK)
1354 				return status;
1355 		}
1356 	}
1357 
1358 	if (addressLength == 0)
1359 		address = NULL;
1360 	else if (address == NULL)
1361 		return B_BAD_VALUE;
1362 
1363 	if (socket->peer.ss_len != 0) {
1364 		if (address != NULL)
1365 			return EISCONN;
1366 
1367 		// socket is connected, we use that address
1368 		address = (struct sockaddr*)&socket->peer;
1369 		addressLength = socket->peer.ss_len;
1370 	}
1371 
1372 	if (address == NULL || addressLength == 0) {
1373 		// don't know where to send to:
1374 		return EDESTADDRREQ;
1375 	}
1376 
1377 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1378 		&& bytesLeft > socket->send.buffer_size)
1379 		return EMSGSIZE;
1380 
1381 	if (socket->address.ss_len == 0) {
1382 		// try to bind first
1383 		status_t status = socket_bind(socket, NULL, 0);
1384 		if (status != B_OK)
1385 			return status;
1386 	}
1387 
1388 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1389 	if (socket->first_info->send_data_no_buffer != NULL) {
1390 		iovec stackVec = { (void*)data, length };
1391 		iovec* vecs = header ? header->msg_iov : &stackVec;
1392 		int vecCount = header ? header->msg_iovlen : 1;
1393 
1394 		ssize_t written = socket->first_info->send_data_no_buffer(
1395 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1396 			addressLength);
1397 		if (written > 0)
1398 			ancillaryDataDeleter.Detach();
1399 		return written;
1400 	}
1401 
1402 	// By convention, if a header is given, the (data, length) equals the first
1403 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1404 	// the size of the remaining ones.
1405 	if (header != NULL) {
1406 		if (header->msg_iovlen <= 1) {
1407 			header = NULL;
1408 		} else {
1409 			for (int i = 1; i < header->msg_iovlen; i++)
1410 				bytesLeft += header->msg_iov[i].iov_len;
1411 		}
1412 	}
1413 
1414 	ssize_t bytesSent = 0;
1415 	size_t vecOffset = 0;
1416 	uint32 vecIndex = 0;
1417 
1418 	while (bytesLeft > 0) {
1419 		// TODO: useful, maybe even computed header space!
1420 		net_buffer* buffer = gNetBufferModule.create(256);
1421 		if (buffer == NULL)
1422 			return ENOBUFS;
1423 
1424 		while (buffer->size < socket->send.buffer_size
1425 			&& buffer->size < bytesLeft) {
1426 			if (vecIndex > 0 && vecOffset == 0) {
1427 				// retrieve next iovec buffer from header
1428 				data = header->msg_iov[vecIndex].iov_base;
1429 				length = header->msg_iov[vecIndex].iov_len;
1430 			}
1431 
1432 			size_t bytes = length;
1433 			if (buffer->size + bytes > socket->send.buffer_size)
1434 				bytes = socket->send.buffer_size - buffer->size;
1435 
1436 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1437 				gNetBufferModule.free(buffer);
1438 				return ENOBUFS;
1439 			}
1440 
1441 			if (bytes != length) {
1442 				// partial send
1443 				vecOffset = bytes;
1444 				length -= vecOffset;
1445 				data = (uint8*)data + vecOffset;
1446 			} else if (header != NULL) {
1447 				// proceed with next buffer, if any
1448 				vecOffset = 0;
1449 				vecIndex++;
1450 
1451 				if (vecIndex >= (uint32)header->msg_iovlen)
1452 					break;
1453 			}
1454 		}
1455 
1456 		// attach ancillary data to the first buffer
1457 		status_t status;
1458 		if (ancillaryData != NULL) {
1459 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1460 			ancillaryDataDeleter.Detach();
1461 			ancillaryData = NULL;
1462 		}
1463 
1464 		size_t bufferSize = buffer->size;
1465 		buffer->flags = flags;
1466 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1467 		memcpy(buffer->destination, address, addressLength);
1468 		buffer->destination->sa_len = addressLength;
1469 
1470 		status = socket->first_info->send_data(socket->first_protocol, buffer);
1471 		if (status != B_OK) {
1472 			size_t sizeAfterSend = buffer->size;
1473 			gNetBufferModule.free(buffer);
1474 
1475 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1476 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1477 				// this appears to be a partial write
1478 				return bytesSent + (bufferSize - sizeAfterSend);
1479 			}
1480 			return status;
1481 		}
1482 
1483 		bytesLeft -= bufferSize;
1484 		bytesSent += bufferSize;
1485 	}
1486 
1487 	return bytesSent;
1488 }
1489 
1490 
1491 status_t
1492 socket_set_option(net_socket* socket, int level, int option, const void* value,
1493 	int length)
1494 {
1495 	if (level != SOL_SOCKET)
1496 		return ENOPROTOOPT;
1497 
1498 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1499 
1500 	switch (option) {
1501 		// TODO: implement other options!
1502 		case SO_LINGER:
1503 		{
1504 			if (length < (int)sizeof(struct linger))
1505 				return B_BAD_VALUE;
1506 
1507 			struct linger* linger = (struct linger*)value;
1508 			if (linger->l_onoff) {
1509 				socket->options |= SO_LINGER;
1510 				socket->linger = linger->l_linger;
1511 			} else {
1512 				socket->options &= ~SO_LINGER;
1513 				socket->linger = 0;
1514 			}
1515 			return B_OK;
1516 		}
1517 
1518 		case SO_SNDBUF:
1519 			if (length != sizeof(uint32))
1520 				return B_BAD_VALUE;
1521 
1522 			socket->send.buffer_size = *(const uint32*)value;
1523 			return B_OK;
1524 
1525 		case SO_RCVBUF:
1526 			if (length != sizeof(uint32))
1527 				return B_BAD_VALUE;
1528 
1529 			socket->receive.buffer_size = *(const uint32*)value;
1530 			return B_OK;
1531 
1532 		case SO_SNDLOWAT:
1533 			if (length != sizeof(uint32))
1534 				return B_BAD_VALUE;
1535 
1536 			socket->send.low_water_mark = *(const uint32*)value;
1537 			return B_OK;
1538 
1539 		case SO_RCVLOWAT:
1540 			if (length != sizeof(uint32))
1541 				return B_BAD_VALUE;
1542 
1543 			socket->receive.low_water_mark = *(const uint32*)value;
1544 			return B_OK;
1545 
1546 		case SO_RCVTIMEO:
1547 		case SO_SNDTIMEO:
1548 		{
1549 			if (length != sizeof(struct timeval))
1550 				return B_BAD_VALUE;
1551 
1552 			const struct timeval* timeval = (const struct timeval*)value;
1553 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1554 			if (timeout == 0)
1555 				timeout = B_INFINITE_TIMEOUT;
1556 
1557 			if (option == SO_SNDTIMEO)
1558 				socket->send.timeout = timeout;
1559 			else
1560 				socket->receive.timeout = timeout;
1561 			return B_OK;
1562 		}
1563 
1564 		case SO_NONBLOCK:
1565 			if (length != sizeof(int32))
1566 				return B_BAD_VALUE;
1567 
1568 			if (*(const int32*)value) {
1569 				socket->send.timeout = 0;
1570 				socket->receive.timeout = 0;
1571 			} else {
1572 				socket->send.timeout = B_INFINITE_TIMEOUT;
1573 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1574 			}
1575 			return B_OK;
1576 
1577 		case SO_BROADCAST:
1578 		case SO_DEBUG:
1579 		case SO_DONTROUTE:
1580 		case SO_KEEPALIVE:
1581 		case SO_OOBINLINE:
1582 		case SO_REUSEADDR:
1583 		case SO_REUSEPORT:
1584 		case SO_USELOOPBACK:
1585 			if (length != sizeof(int32))
1586 				return B_BAD_VALUE;
1587 
1588 			if (*(const int32*)value)
1589 				socket->options |= option;
1590 			else
1591 				socket->options &= ~option;
1592 			return B_OK;
1593 
1594 		case SO_BINDTODEVICE:
1595 		{
1596 			if (length != sizeof(uint32))
1597 				return B_BAD_VALUE;
1598 
1599 			// TODO: we might want to check if the device exists at all
1600 			// (although it doesn't really harm when we don't)
1601 			socket->bound_to_device = *(const uint32*)value;
1602 			return B_OK;
1603 		}
1604 
1605 		default:
1606 			break;
1607 	}
1608 
1609 	dprintf("socket_setsockopt: unknown option %d\n", option);
1610 	return ENOPROTOOPT;
1611 }
1612 
1613 
1614 int
1615 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1616 	int length)
1617 {
1618 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1619 		level, option, value, length);
1620 }
1621 
1622 
1623 int
1624 socket_shutdown(net_socket* socket, int direction)
1625 {
1626 	return socket->first_info->shutdown(socket->first_protocol, direction);
1627 }
1628 
1629 
1630 status_t
1631 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1632 {
1633 	sockets[0] = NULL;
1634 	sockets[1] = NULL;
1635 
1636 	// create sockets
1637 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1638 	if (error != B_OK)
1639 		return error;
1640 
1641 	error = socket_open(family, type, protocol, &sockets[1]);
1642 
1643 	// bind one
1644 	if (error == B_OK)
1645 		error = socket_bind(sockets[0], NULL, 0);
1646 
1647 	// start listening
1648 	if (error == B_OK)
1649 		error = socket_listen(sockets[0], 1);
1650 
1651 	// connect them
1652 	if (error == B_OK) {
1653 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1654 			sockets[0]->address.ss_len);
1655 	}
1656 
1657 	// accept a socket
1658 	net_socket* acceptedSocket = NULL;
1659 	if (error == B_OK)
1660 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1661 
1662 	if (error == B_OK) {
1663 		// everything worked: close the listener socket
1664 		socket_close(sockets[0]);
1665 		socket_free(sockets[0]);
1666 		sockets[0] = acceptedSocket;
1667 	} else {
1668 		// close sockets on error
1669 		for (int i = 0; i < 2; i++) {
1670 			if (sockets[i] != NULL) {
1671 				socket_close(sockets[i]);
1672 				socket_free(sockets[i]);
1673 				sockets[i] = NULL;
1674 			}
1675 		}
1676 	}
1677 
1678 	return error;
1679 }
1680 
1681 
1682 //	#pragma mark -
1683 
1684 
1685 static status_t
1686 socket_std_ops(int32 op, ...)
1687 {
1688 	switch (op) {
1689 		case B_MODULE_INIT:
1690 		{
1691 			new (&sSocketList) SocketList;
1692 			mutex_init(&sSocketLock, "socket list");
1693 
1694 #if ENABLE_DEBUGGER_COMMANDS
1695 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1696 			add_debugger_command("socket", dump_socket, "dumps a socket");
1697 #endif
1698 			return B_OK;
1699 		}
1700 		case B_MODULE_UNINIT:
1701 			ASSERT(sSocketList.IsEmpty());
1702 			mutex_destroy(&sSocketLock);
1703 
1704 #if ENABLE_DEBUGGER_COMMANDS
1705 			remove_debugger_command("socket", dump_socket);
1706 			remove_debugger_command("sockets", dump_sockets);
1707 #endif
1708 			return B_OK;
1709 
1710 		default:
1711 			return B_ERROR;
1712 	}
1713 }
1714 
1715 
1716 net_socket_module_info gNetSocketModule = {
1717 	{
1718 		NET_SOCKET_MODULE_NAME,
1719 		0,
1720 		socket_std_ops
1721 	},
1722 	socket_open,
1723 	socket_close,
1724 	socket_free,
1725 
1726 	socket_readv,
1727 	socket_writev,
1728 	socket_control,
1729 
1730 	socket_read_avail,
1731 	socket_send_avail,
1732 
1733 	socket_send_data,
1734 	socket_receive_data,
1735 
1736 	socket_get_option,
1737 	socket_set_option,
1738 
1739 	socket_get_next_stat,
1740 
1741 	// connections
1742 	socket_acquire,
1743 	socket_release,
1744 	socket_spawn_pending,
1745 	socket_dequeue_connected,
1746 	socket_count_connected,
1747 	socket_set_max_backlog,
1748 	socket_has_parent,
1749 	socket_connected,
1750 	socket_aborted,
1751 
1752 	// notifications
1753 	socket_request_notification,
1754 	socket_cancel_notification,
1755 	socket_notify,
1756 
1757 	// standard socket API
1758 	socket_accept,
1759 	socket_bind,
1760 	socket_connect,
1761 	socket_getpeername,
1762 	socket_getsockname,
1763 	socket_getsockopt,
1764 	socket_listen,
1765 	socket_receive,
1766 	socket_send,
1767 	socket_setsockopt,
1768 	socket_shutdown,
1769 	socket_socketpair
1770 };
1771 
1772