xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 47c05920fde47c2618efccd24bd82f1e79cdf05a)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static status_t
166 create_socket(int family, int type, int protocol, net_socket_private** _socket)
167 {
168 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
169 	if (socket == NULL)
170 		return B_NO_MEMORY;
171 	status_t status = socket->InitCheck();
172 	if (status != B_OK) {
173 		delete socket;
174 		return status;
175 	}
176 
177 	socket->family = family;
178 	socket->type = type;
179 	socket->protocol = protocol;
180 
181 	status = get_domain_protocols(socket);
182 	if (status != B_OK) {
183 		delete socket;
184 		return status;
185 	}
186 
187 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
188 		socket->type, socket->protocol);
189 
190 #ifdef TRACE_SOCKET
191 	net_protocol* current = socket->first_protocol;
192 	for (int i = 0; current != NULL; current = current->next, i++)
193 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
194 #endif
195 
196 	*_socket = socket;
197 	return B_OK;
198 }
199 
200 
201 static status_t
202 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
203 	void* data, size_t dataLen)
204 {
205 	cmsghdr* header = (cmsghdr*)data;
206 
207 	if (dataLen == 0)
208 		return B_OK;
209 
210 	if (socket->first_info->add_ancillary_data == NULL)
211 		return B_NOT_SUPPORTED;
212 
213 	while (true) {
214 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
215 			return B_BAD_VALUE;
216 
217 		status_t status = socket->first_info->add_ancillary_data(
218 			socket->first_protocol, container, header);
219 		if (status != B_OK)
220 			return status;
221 
222 		if (dataLen <= _ALIGN(header->cmsg_len))
223 			break;
224 		dataLen -= _ALIGN(header->cmsg_len);
225 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
226 	}
227 
228 	return B_OK;
229 }
230 
231 
232 static status_t
233 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
234 	msghdr* messageHeader)
235 {
236 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
237 	int dataBufferLen = messageHeader->msg_controllen;
238 
239 	if (container == NULL || dataBuffer == NULL) {
240 		messageHeader->msg_controllen = 0;
241 		return B_OK;
242 	}
243 
244 	ancillary_data_header header;
245 	void* data = NULL;
246 
247 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
248 		if (socket->first_info->process_ancillary_data == NULL)
249 			return B_NOT_SUPPORTED;
250 
251 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
252 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
253 		if (bytesWritten < 0)
254 			return bytesWritten;
255 
256 		dataBuffer += bytesWritten;
257 		dataBufferLen -= bytesWritten;
258 	}
259 
260 	messageHeader->msg_controllen -= dataBufferLen;
261 
262 	return B_OK;
263 }
264 
265 
266 static status_t
267 process_ancillary_data(net_socket* socket,
268 	net_buffer* buffer, msghdr* messageHeader)
269 {
270 	void *dataBuffer = messageHeader->msg_control;
271 	ssize_t bytesWritten;
272 
273 	if (dataBuffer == NULL) {
274 		messageHeader->msg_controllen = 0;
275 		return B_OK;
276 	}
277 
278 	if (socket->first_info->process_ancillary_data_no_container == NULL)
279 		return B_NOT_SUPPORTED;
280 
281 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
282 		socket->first_protocol, buffer, dataBuffer,
283 		messageHeader->msg_controllen);
284 	if (bytesWritten < 0)
285 		return bytesWritten;
286 	messageHeader->msg_controllen = bytesWritten;
287 
288 	return B_OK;
289 }
290 
291 
292 static ssize_t
293 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
294 	size_t length, int flags)
295 {
296 	iovec stackVec = { data, length };
297 	iovec* vecs = header ? header->msg_iov : &stackVec;
298 	int vecCount = header ? header->msg_iovlen : 1;
299 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
300 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
301 
302 	ancillary_data_container* ancillaryData = NULL;
303 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
304 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
305 		addressLen, flags);
306 	if (bytesRead < 0)
307 		return bytesRead;
308 
309 	CObjectDeleter<
310 		ancillary_data_container, void, delete_ancillary_data_container>
311 		ancillaryDataDeleter(ancillaryData);
312 
313 	// process ancillary data
314 	if (header != NULL) {
315 		status_t status = process_ancillary_data(socket, ancillaryData, header);
316 		if (status != B_OK)
317 			return status;
318 
319 		header->msg_flags = 0;
320 	}
321 
322 	return bytesRead;
323 }
324 
325 
326 #if ENABLE_DEBUGGER_COMMANDS
327 
328 
329 static void
330 print_socket_line(net_socket_private* socket, const char* prefix)
331 {
332 	BReference<net_socket_private> parent = socket->parent.GetReference();
333 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
334 		socket->family, socket->type, socket->protocol, socket->owner,
335 		socket->first_protocol, socket->first_info, parent.Get(),
336 		parent.IsSet() ? socket->is_connected ? " (c)" : " (p)" : "");
337 }
338 
339 
340 static int
341 dump_socket(int argc, char** argv)
342 {
343 	if (argc < 2) {
344 		kprintf("usage: %s [address]\n", argv[0]);
345 		return 0;
346 	}
347 
348 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
349 
350 	kprintf("SOCKET %p\n", socket);
351 	kprintf("  family.type.protocol: %d.%d.%d\n",
352 		socket->family, socket->type, socket->protocol);
353 	BReference<net_socket_private> parent = socket->parent.GetReference();
354 	kprintf("  parent:               %p\n", parent.Get());
355 	kprintf("  first protocol:       %p\n", socket->first_protocol);
356 	kprintf("  first module_info:    %p\n", socket->first_info);
357 	kprintf("  options:              %x\n", socket->options);
358 	kprintf("  linger:               %d\n", socket->linger);
359 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
360 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
361 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
362 	kprintf("  is connected:         %d\n", socket->is_connected);
363 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
364 
365 	if (socket->child_count == 0)
366 		return 0;
367 
368 	kprintf("    pending children:\n");
369 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
370 	while (net_socket_private* child = iterator.Next()) {
371 		print_socket_line(child, "      ");
372 	}
373 
374 	kprintf("    connected children:\n");
375 	iterator = socket->connected_children.GetIterator();
376 	while (net_socket_private* child = iterator.Next()) {
377 		print_socket_line(child, "      ");
378 	}
379 
380 	return 0;
381 }
382 
383 
384 static int
385 dump_sockets(int argc, char** argv)
386 {
387 	kprintf("address        kind  owner protocol   module_info parent\n");
388 
389 	SocketList::Iterator iterator = sSocketList.GetIterator();
390 	while (net_socket_private* socket = iterator.Next()) {
391 		print_socket_line(socket, "");
392 
393 		SocketList::Iterator childIterator
394 			= socket->pending_children.GetIterator();
395 		while (net_socket_private* child = childIterator.Next()) {
396 			print_socket_line(child, " ");
397 		}
398 
399 		childIterator = socket->connected_children.GetIterator();
400 		while (net_socket_private* child = childIterator.Next()) {
401 			print_socket_line(child, " ");
402 		}
403 	}
404 
405 	return 0;
406 }
407 
408 
409 #endif	// ENABLE_DEBUGGER_COMMANDS
410 
411 
412 //	#pragma mark -
413 
414 
415 status_t
416 socket_open(int family, int type, int protocol, net_socket** _socket)
417 {
418 	net_socket_private* socket;
419 	status_t status = create_socket(family, type, protocol, &socket);
420 	if (status != B_OK)
421 		return status;
422 
423 	status = socket->first_info->open(socket->first_protocol);
424 	if (status != B_OK) {
425 		delete socket;
426 		return status;
427 	}
428 
429 	socket->owner = team_get_current_team_id();
430 	socket->is_in_socket_list = true;
431 
432 	mutex_lock(&sSocketLock);
433 	sSocketList.Add(socket);
434 	mutex_unlock(&sSocketLock);
435 
436 	*_socket = socket;
437 	return B_OK;
438 }
439 
440 
441 status_t
442 socket_close(net_socket* _socket)
443 {
444 	net_socket_private* socket = (net_socket_private*)_socket;
445 	return socket->first_info->close(socket->first_protocol);
446 }
447 
448 
449 void
450 socket_free(net_socket* _socket)
451 {
452 	net_socket_private* socket = (net_socket_private*)_socket;
453 	socket->first_info->free(socket->first_protocol);
454 	socket->ReleaseReference();
455 }
456 
457 
458 status_t
459 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
460 {
461 	switch (op) {
462 		case FIONBIO:
463 		{
464 			if (data == NULL)
465 				return B_BAD_VALUE;
466 
467 			int value;
468 			if (is_syscall()) {
469 				if (!IS_USER_ADDRESS(data)
470 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
471 					return B_BAD_ADDRESS;
472 				}
473 			} else
474 				value = *(int*)data;
475 
476 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
477 				sizeof(int));
478 		}
479 
480 		case FIONREAD:
481 		{
482 			if (data == NULL || (socket->options & SO_ACCEPTCONN) != 0)
483 				return B_BAD_VALUE;
484 
485 			int available = (int)socket_read_avail(socket);
486 			if (available < 0)
487 				available = 0;
488 
489 			if (is_syscall()) {
490 				if (!IS_USER_ADDRESS(data)
491 					|| user_memcpy(data, &available, sizeof(available))
492 						!= B_OK) {
493 					return B_BAD_ADDRESS;
494 				}
495 			} else
496 				*(int*)data = available;
497 
498 			return B_OK;
499 		}
500 
501 		case B_SET_BLOCKING_IO:
502 		case B_SET_NONBLOCKING_IO:
503 		{
504 			int value = op == B_SET_NONBLOCKING_IO;
505 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
506 				sizeof(int));
507 		}
508 	}
509 
510 	return socket->first_info->control(socket->first_protocol,
511 		LEVEL_DRIVER_IOCTL, op, data, &length);
512 }
513 
514 
515 ssize_t
516 socket_read_avail(net_socket* socket)
517 {
518 	return socket->first_info->read_avail(socket->first_protocol);
519 }
520 
521 
522 ssize_t
523 socket_send_avail(net_socket* socket)
524 {
525 	return socket->first_info->send_avail(socket->first_protocol);
526 }
527 
528 
529 status_t
530 socket_send_data(net_socket* socket, net_buffer* buffer)
531 {
532 	return socket->first_info->send_data(socket->first_protocol,
533 		buffer);
534 }
535 
536 
537 status_t
538 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
539 	net_buffer** _buffer)
540 {
541 	status_t status = socket->first_info->read_data(socket->first_protocol,
542 		length, flags, _buffer);
543 	if (status != B_OK)
544 		return status;
545 
546 	if (*_buffer && length < (*_buffer)->size) {
547 		// discard any data behind the amount requested
548 		gNetBufferModule.trim(*_buffer, length);
549 	}
550 
551 	return status;
552 }
553 
554 
555 status_t
556 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
557 {
558 	MutexLocker locker(sSocketLock);
559 
560 	net_socket_private* socket = NULL;
561 	SocketList::Iterator iterator = sSocketList.GetIterator();
562 	uint32 cookie = *_cookie;
563 	uint32 count = 0;
564 
565 	while (true) {
566 		socket = iterator.Next();
567 		if (socket == NULL)
568 			return B_ENTRY_NOT_FOUND;
569 
570 		// TODO: also traverse the pending connections
571 		if (count == cookie)
572 			break;
573 
574 		if (family == -1 || family == socket->family)
575 			count++;
576 	}
577 
578 	*_cookie = count + 1;
579 
580 	stat->family = socket->family;
581 	stat->type = socket->type;
582 	stat->protocol = socket->protocol;
583 	stat->owner = socket->owner;
584 	stat->state[0] = '\0';
585 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
586 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
587 	stat->receive_queue_size = 0;
588 	stat->send_queue_size = 0;
589 
590 	// fill in protocol specific data (if supported by the protocol)
591 	size_t length = sizeof(net_stat);
592 	socket->first_info->control(socket->first_protocol, socket->protocol,
593 		NET_STAT_SOCKET, stat, &length);
594 
595 	return B_OK;
596 }
597 
598 
599 //	#pragma mark - connections
600 
601 
602 bool
603 socket_acquire(net_socket* _socket)
604 {
605 	net_socket_private* socket = (net_socket_private*)_socket;
606 
607 	// During destruction, the socket might still be accessible over its
608 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
609 	// socket anymore -- while not obvious, the endpoint protocol is responsible
610 	// for the proper locking here.
611 	if (socket->CountReferences() == 0)
612 		return false;
613 
614 	socket->AcquireReference();
615 	return true;
616 }
617 
618 
619 bool
620 socket_release(net_socket* _socket)
621 {
622 	net_socket_private* socket = (net_socket_private*)_socket;
623 	return socket->ReleaseReference();
624 }
625 
626 
627 status_t
628 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
629 {
630 	net_socket_private* parent = (net_socket_private*)_parent;
631 
632 	TRACE("%s(%p)\n", __FUNCTION__, parent);
633 
634 	MutexLocker locker(parent->lock);
635 
636 	// We actually accept more pending connections to compensate for those
637 	// that never complete, and also make sure at least a single connection
638 	// can always be accepted
639 	if (parent->child_count > 3 * parent->max_backlog / 2)
640 		return ENOBUFS;
641 
642 	net_socket_private* socket;
643 	status_t status = create_socket(parent->family, parent->type,
644 		parent->protocol, &socket);
645 	if (status != B_OK)
646 		return status;
647 
648 	// inherit parent's properties
649 	socket->send = parent->send;
650 	socket->receive = parent->receive;
651 	socket->options = parent->options & (SO_KEEPALIVE | SO_DONTROUTE | SO_LINGER | SO_OOBINLINE);
652 	socket->linger = parent->linger;
653 	socket->owner = parent->owner;
654 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
655 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
656 
657 	// add to the parent's list of pending connections
658 	parent->pending_children.Add(socket);
659 	socket->parent = parent;
660 	parent->child_count++;
661 
662 	*_socket = socket;
663 	return B_OK;
664 }
665 
666 
667 /*!	Dequeues a connected child from a parent socket.
668 	It also returns a reference with the child socket.
669 */
670 status_t
671 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
672 {
673 	net_socket_private* parent = (net_socket_private*)_parent;
674 
675 	mutex_lock(&parent->lock);
676 
677 	net_socket_private* socket = parent->connected_children.RemoveHead();
678 	if (socket != NULL) {
679 		socket->AcquireReference();
680 		socket->RemoveFromParent();
681 		parent->child_count--;
682 		*_socket = socket;
683 	}
684 
685 	mutex_unlock(&parent->lock);
686 
687 	if (socket == NULL)
688 		return B_ENTRY_NOT_FOUND;
689 
690 	return B_OK;
691 }
692 
693 
694 ssize_t
695 socket_count_connected(net_socket* _parent)
696 {
697 	net_socket_private* parent = (net_socket_private*)_parent;
698 
699 	MutexLocker _(parent->lock);
700 	return parent->connected_children.Count();
701 }
702 
703 
704 status_t
705 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
706 {
707 	net_socket_private* socket = (net_socket_private*)_socket;
708 
709 	// we enforce an upper limit of connections waiting to be accepted
710 	if (backlog > 256)
711 		backlog = 256;
712 
713 	MutexLocker _(socket->lock);
714 
715 	// first remove the pending connections, then the already connected
716 	// ones as needed
717 	net_socket_private* child;
718 	while (socket->child_count > backlog
719 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
720 		child->RemoveFromParent();
721 		socket->child_count--;
722 	}
723 	while (socket->child_count > backlog
724 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
725 		child->RemoveFromParent();
726 		socket->child_count--;
727 	}
728 
729 	socket->max_backlog = backlog;
730 	return B_OK;
731 }
732 
733 
734 /*!	Returns whether or not this socket has a parent. The parent might not be
735 	valid anymore, though.
736 */
737 bool
738 socket_has_parent(net_socket* _socket)
739 {
740 	net_socket_private* socket = (net_socket_private*)_socket;
741 	return socket->parent != NULL;
742 }
743 
744 
745 /*!	The socket has been connected. It will be moved to the connected queue
746 	of its parent socket.
747 */
748 status_t
749 socket_connected(net_socket* _socket)
750 {
751 	net_socket_private* socket = (net_socket_private*)_socket;
752 
753 	TRACE("socket_connected(%p)\n", socket);
754 
755 	if (socket->parent == NULL) {
756 		socket->is_connected = true;
757 		return B_OK;
758 	}
759 
760 	BReference<net_socket_private> parent = socket->parent.GetReference();
761 	if (!parent.IsSet())
762 		return B_BAD_VALUE;
763 
764 	MutexLocker _(parent->lock);
765 
766 	parent->pending_children.Remove(socket);
767 	parent->connected_children.Add(socket);
768 	socket->is_connected = true;
769 
770 	// notify parent
771 	if (parent->select_pool)
772 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
773 
774 	return B_OK;
775 }
776 
777 
778 /*!	The socket has been aborted. Steals the parent's reference, and releases
779 	it.
780 */
781 status_t
782 socket_aborted(net_socket* _socket)
783 {
784 	net_socket_private* socket = (net_socket_private*)_socket;
785 
786 	TRACE("socket_aborted(%p)\n", socket);
787 
788 	BReference<net_socket_private> parent = socket->parent.GetReference();
789 	if (!parent.IsSet())
790 		return B_BAD_VALUE;
791 
792 	MutexLocker _(parent->lock);
793 
794 	if (socket->is_connected)
795 		parent->connected_children.Remove(socket);
796 	else
797 		parent->pending_children.Remove(socket);
798 
799 	parent->child_count--;
800 	socket->RemoveFromParent();
801 
802 	return B_OK;
803 }
804 
805 
806 //	#pragma mark - notifications
807 
808 
809 status_t
810 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
811 {
812 	net_socket_private* socket = (net_socket_private*)_socket;
813 
814 	mutex_lock(&socket->lock);
815 
816 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
817 		event);
818 
819 	mutex_unlock(&socket->lock);
820 
821 	if (status != B_OK)
822 		return status;
823 
824 	// check if the event is already present
825 	// TODO: add support for poll() types
826 
827 	switch (event) {
828 		case B_SELECT_READ:
829 		{
830 			ssize_t available = socket_read_avail(socket);
831 			if ((ssize_t)socket->receive.low_water_mark <= available
832 				|| available < B_OK)
833 				notify_select_event(sync, event);
834 			break;
835 		}
836 		case B_SELECT_WRITE:
837 		{
838 			if ((socket->options & SO_ACCEPTCONN) != 0)
839 				break;
840 
841 			ssize_t available = socket_send_avail(socket);
842 			if ((ssize_t)socket->send.low_water_mark <= available
843 				|| available < B_OK)
844 				notify_select_event(sync, event);
845 			break;
846 		}
847 		case B_SELECT_ERROR:
848 			if (socket->error != B_OK)
849 				notify_select_event(sync, event);
850 			break;
851 	}
852 
853 	return B_OK;
854 }
855 
856 
857 status_t
858 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
859 {
860 	net_socket_private* socket = (net_socket_private*)_socket;
861 
862 	MutexLocker _(socket->lock);
863 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
864 }
865 
866 
867 status_t
868 socket_notify(net_socket* _socket, uint8 event, int32 value)
869 {
870 	net_socket_private* socket = (net_socket_private*)_socket;
871 	bool notify = true;
872 
873 	switch (event) {
874 		case B_SELECT_READ:
875 			if ((ssize_t)socket->receive.low_water_mark > value
876 				&& value >= B_OK)
877 				notify = false;
878 			break;
879 
880 		case B_SELECT_WRITE:
881 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
882 				notify = false;
883 			break;
884 
885 		case B_SELECT_ERROR:
886 			socket->error = value;
887 			break;
888 	}
889 
890 	MutexLocker _(socket->lock);
891 
892 	if (notify && socket->select_pool != NULL) {
893 		notify_select_event_pool(socket->select_pool, event);
894 
895 		if (event == B_SELECT_ERROR) {
896 			// always notify read/write on error
897 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
898 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
899 		}
900 	}
901 
902 	return B_OK;
903 }
904 
905 
906 //	#pragma mark - standard socket API
907 
908 
909 int
910 socket_accept(net_socket* socket, struct sockaddr* address,
911 	socklen_t* _addressLength, net_socket** _acceptedSocket)
912 {
913 	if ((socket->options & SO_ACCEPTCONN) == 0)
914 		return B_BAD_VALUE;
915 
916 	net_socket* accepted;
917 	status_t status = socket->first_info->accept(socket->first_protocol,
918 		&accepted);
919 	if (status != B_OK)
920 		return status;
921 
922 	if (address && *_addressLength > 0) {
923 		memcpy(address, &accepted->peer, min_c(*_addressLength,
924 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
925 		*_addressLength = accepted->peer.ss_len;
926 	}
927 
928 	*_acceptedSocket = accepted;
929 	return B_OK;
930 }
931 
932 
933 int
934 socket_bind(net_socket* socket, const struct sockaddr* address,
935 	socklen_t addressLength)
936 {
937 	sockaddr empty;
938 	if (address == NULL) {
939 		// special - try to bind to an empty address, like INADDR_ANY
940 		memset(&empty, 0, sizeof(sockaddr));
941 		empty.sa_len = sizeof(sockaddr);
942 		empty.sa_family = socket->family;
943 
944 		address = &empty;
945 		addressLength = sizeof(sockaddr);
946 	}
947 
948 	if (socket->address.ss_len != 0)
949 		return B_BAD_VALUE;
950 
951 	memcpy(&socket->address, address, sizeof(sockaddr));
952 	socket->address.ss_len = sizeof(sockaddr_storage);
953 
954 	status_t status = socket->first_info->bind(socket->first_protocol,
955 		(sockaddr*)address);
956 	if (status != B_OK) {
957 		// clear address again, as binding failed
958 		socket->address.ss_len = 0;
959 	}
960 
961 	return status;
962 }
963 
964 
965 int
966 socket_connect(net_socket* socket, const struct sockaddr* address,
967 	socklen_t addressLength)
968 {
969 	if (address == NULL || addressLength == 0)
970 		return ENETUNREACH;
971 
972 	if (socket->address.ss_len == 0) {
973 		// try to bind first
974 		status_t status = socket_bind(socket, NULL, 0);
975 		if (status != B_OK)
976 			return status;
977 	}
978 
979 	return socket->first_info->connect(socket->first_protocol, address);
980 }
981 
982 
983 int
984 socket_getpeername(net_socket* _socket, struct sockaddr* address,
985 	socklen_t* _addressLength)
986 {
987 	net_socket_private* socket = (net_socket_private*)_socket;
988 	BReference<net_socket_private> parent = socket->parent.GetReference();
989 
990 	if ((!parent.IsSet() && !socket->is_connected) || socket->peer.ss_len == 0)
991 		return ENOTCONN;
992 
993 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
994 	*_addressLength = socket->peer.ss_len;
995 	return B_OK;
996 }
997 
998 
999 int
1000 socket_getsockname(net_socket* socket, struct sockaddr* address,
1001 	socklen_t* _addressLength)
1002 {
1003 	if (socket->address.ss_len == 0) {
1004 		struct sockaddr buffer;
1005 		memset(&buffer, 0, sizeof(buffer));
1006 		buffer.sa_family = socket->family;
1007 
1008 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
1009 		*_addressLength = sizeof(buffer);
1010 		return B_OK;
1011 	}
1012 
1013 	memcpy(address, &socket->address, min_c(*_addressLength,
1014 		socket->address.ss_len));
1015 	*_addressLength = socket->address.ss_len;
1016 	return B_OK;
1017 }
1018 
1019 
1020 status_t
1021 socket_get_option(net_socket* socket, int level, int option, void* value,
1022 	int* _length)
1023 {
1024 	if (level != SOL_SOCKET)
1025 		return ENOPROTOOPT;
1026 
1027 	switch (option) {
1028 		case SO_SNDBUF:
1029 		{
1030 			uint32* size = (uint32*)value;
1031 			*size = socket->send.buffer_size;
1032 			*_length = sizeof(uint32);
1033 			return B_OK;
1034 		}
1035 
1036 		case SO_RCVBUF:
1037 		{
1038 			uint32* size = (uint32*)value;
1039 			*size = socket->receive.buffer_size;
1040 			*_length = sizeof(uint32);
1041 			return B_OK;
1042 		}
1043 
1044 		case SO_SNDLOWAT:
1045 		{
1046 			uint32* size = (uint32*)value;
1047 			*size = socket->send.low_water_mark;
1048 			*_length = sizeof(uint32);
1049 			return B_OK;
1050 		}
1051 
1052 		case SO_RCVLOWAT:
1053 		{
1054 			uint32* size = (uint32*)value;
1055 			*size = socket->receive.low_water_mark;
1056 			*_length = sizeof(uint32);
1057 			return B_OK;
1058 		}
1059 
1060 		case SO_RCVTIMEO:
1061 		case SO_SNDTIMEO:
1062 		{
1063 			if (*_length < (int)sizeof(struct timeval))
1064 				return B_BAD_VALUE;
1065 
1066 			bigtime_t timeout;
1067 			if (option == SO_SNDTIMEO)
1068 				timeout = socket->send.timeout;
1069 			else
1070 				timeout = socket->receive.timeout;
1071 			if (timeout == B_INFINITE_TIMEOUT)
1072 				timeout = 0;
1073 
1074 			struct timeval* timeval = (struct timeval*)value;
1075 			timeval->tv_sec = timeout / 1000000LL;
1076 			timeval->tv_usec = timeout % 1000000LL;
1077 
1078 			*_length = sizeof(struct timeval);
1079 			return B_OK;
1080 		}
1081 
1082 		case SO_NONBLOCK:
1083 		{
1084 			int32* _set = (int32*)value;
1085 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1086 			*_length = sizeof(int32);
1087 			return B_OK;
1088 		}
1089 
1090 		case SO_ACCEPTCONN:
1091 		case SO_BROADCAST:
1092 		case SO_DEBUG:
1093 		case SO_DONTROUTE:
1094 		case SO_KEEPALIVE:
1095 		case SO_OOBINLINE:
1096 		case SO_REUSEADDR:
1097 		case SO_REUSEPORT:
1098 		case SO_USELOOPBACK:
1099 		{
1100 			int32* _set = (int32*)value;
1101 			*_set = (socket->options & option) != 0;
1102 			*_length = sizeof(int32);
1103 			return B_OK;
1104 		}
1105 
1106 		case SO_TYPE:
1107 		{
1108 			int32* _set = (int32*)value;
1109 			*_set = socket->type;
1110 			*_length = sizeof(int32);
1111 			return B_OK;
1112 		}
1113 
1114 		case SO_ERROR:
1115 		{
1116 			int32* _set = (int32*)value;
1117 			*_set = socket->error;
1118 			*_length = sizeof(int32);
1119 
1120 			socket->error = B_OK;
1121 				// clear error upon retrieval
1122 			return B_OK;
1123 		}
1124 
1125 		default:
1126 			break;
1127 	}
1128 
1129 	dprintf("socket_getsockopt: unknown option %d\n", option);
1130 	return ENOPROTOOPT;
1131 }
1132 
1133 
1134 int
1135 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1136 	int* _length)
1137 {
1138 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1139 		level, option, value, _length);
1140 }
1141 
1142 
1143 int
1144 socket_listen(net_socket* socket, int backlog)
1145 {
1146 	status_t status = socket->first_info->listen(socket->first_protocol,
1147 		backlog);
1148 	if (status == B_OK)
1149 		socket->options |= SO_ACCEPTCONN;
1150 
1151 	return status;
1152 }
1153 
1154 
1155 ssize_t
1156 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1157 	int flags)
1158 {
1159 	const int originalFlags = flags;
1160 
1161 	// MSG_NOSIGNAL is only meaningful for send(), not receive(), but it is
1162 	// sometimes specified anyway. Mask it off to avoid unnecessary errors.
1163 	flags &= ~MSG_NOSIGNAL;
1164 
1165 	// If the protocol sports read_data_no_buffer() we use it.
1166 	if (socket->first_info->read_data_no_buffer != NULL)
1167 		return socket_receive_no_buffer(socket, header, data, length, flags);
1168 
1169 	// Mask off flags handled in this function.
1170 	flags &= ~(MSG_TRUNC);
1171 
1172 	size_t totalLength = length;
1173 	if (header != NULL) {
1174 		ASSERT(data == header->msg_iov[0].iov_base);
1175 
1176 		// calculate the length considering all of the extra buffers
1177 		for (int i = 1; i < header->msg_iovlen; i++)
1178 			totalLength += header->msg_iov[i].iov_len;
1179 	}
1180 
1181 	net_buffer* buffer;
1182 	status_t status = socket->first_info->read_data(
1183 		socket->first_protocol, totalLength, flags, &buffer);
1184 	if (status != B_OK)
1185 		return status;
1186 
1187 	// process ancillary data
1188 	if (header != NULL) {
1189 		if (buffer != NULL && header->msg_control != NULL) {
1190 			ancillary_data_container* container
1191 				= gNetBufferModule.get_ancillary_data(buffer);
1192 			if (container != NULL)
1193 				status = process_ancillary_data(socket, container, header);
1194 			else
1195 				status = process_ancillary_data(socket, buffer, header);
1196 			if (status != B_OK) {
1197 				gNetBufferModule.free(buffer);
1198 				return status;
1199 			}
1200 		} else
1201 			header->msg_controllen = 0;
1202 	}
1203 
1204 	// TODO: - returning a NULL buffer when received 0 bytes
1205 	//         may not make much sense as we still need the address
1206 
1207 	size_t nameLen = 0;
1208 	if (header != NULL) {
1209 		// TODO: - consider the control buffer options
1210 		nameLen = header->msg_namelen;
1211 		header->msg_namelen = 0;
1212 		header->msg_flags = 0;
1213 	}
1214 
1215 	if (buffer == NULL)
1216 		return 0;
1217 
1218 	const size_t bytesReceived = buffer->size;
1219 	size_t bytesCopied = 0;
1220 
1221 	size_t toRead = min_c(bytesReceived, length);
1222 	status = gNetBufferModule.read(buffer, 0, data, toRead);
1223 	if (status != B_OK) {
1224 		gNetBufferModule.free(buffer);
1225 
1226 		if (status == B_BAD_ADDRESS)
1227 			return status;
1228 		return ENOBUFS;
1229 	}
1230 
1231 	// if first copy was a success, proceed to following copies as required
1232 	bytesCopied += toRead;
1233 
1234 	if (header != NULL) {
1235 		// We start at iovec[1] as { data, length } is iovec[0].
1236 		for (int i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1237 			iovec& vec = header->msg_iov[i];
1238 			toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1239 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1240 					toRead) < B_OK) {
1241 				break;
1242 			}
1243 
1244 			bytesCopied += toRead;
1245 		}
1246 
1247 		if (header->msg_name != NULL) {
1248 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1249 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1250 		}
1251 	}
1252 
1253 	gNetBufferModule.free(buffer);
1254 
1255 	if (bytesCopied < bytesReceived) {
1256 		if (header != NULL)
1257 			header->msg_flags = MSG_TRUNC;
1258 
1259 		if ((originalFlags & MSG_TRUNC) != 0)
1260 			return bytesReceived;
1261 	}
1262 
1263 	return bytesCopied;
1264 }
1265 
1266 
1267 ssize_t
1268 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1269 	int flags)
1270 {
1271 	const bool nosignal = ((flags & MSG_NOSIGNAL) != 0);
1272 	flags &= ~MSG_NOSIGNAL;
1273 
1274 	size_t bytesLeft = length;
1275 	if (length > SSIZE_MAX)
1276 		return B_BAD_VALUE;
1277 
1278 	ancillary_data_container* ancillaryData = NULL;
1279 	CObjectDeleter<
1280 		ancillary_data_container, void, delete_ancillary_data_container>
1281 		ancillaryDataDeleter;
1282 
1283 	const sockaddr* address = NULL;
1284 	socklen_t addressLength = 0;
1285 	if (header != NULL) {
1286 		address = (const sockaddr*)header->msg_name;
1287 		addressLength = header->msg_namelen;
1288 
1289 		// get the ancillary data
1290 		if (header->msg_control != NULL) {
1291 			ancillaryData = create_ancillary_data_container();
1292 			if (ancillaryData == NULL)
1293 				return B_NO_MEMORY;
1294 			ancillaryDataDeleter.SetTo(ancillaryData);
1295 
1296 			status_t status = add_ancillary_data(socket, ancillaryData,
1297 				(cmsghdr*)header->msg_control, header->msg_controllen);
1298 			if (status != B_OK)
1299 				return status;
1300 		}
1301 	}
1302 
1303 	if (addressLength == 0)
1304 		address = NULL;
1305 	else if (address == NULL)
1306 		return B_BAD_VALUE;
1307 
1308 	if (socket->peer.ss_len != 0) {
1309 		if (address != NULL)
1310 			return EISCONN;
1311 
1312 		// socket is connected, we use that address
1313 		address = (struct sockaddr*)&socket->peer;
1314 		addressLength = socket->peer.ss_len;
1315 	}
1316 
1317 	if (address == NULL || addressLength == 0) {
1318 		// don't know where to send to:
1319 		return EDESTADDRREQ;
1320 	}
1321 
1322 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1323 		&& bytesLeft > socket->send.buffer_size)
1324 		return EMSGSIZE;
1325 
1326 	if (socket->address.ss_len == 0) {
1327 		// try to bind first
1328 		status_t status = socket_bind(socket, NULL, 0);
1329 		if (status != B_OK)
1330 			return status;
1331 	}
1332 
1333 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1334 	if (socket->first_info->send_data_no_buffer != NULL) {
1335 		iovec stackVec = { (void*)data, length };
1336 		iovec* vecs = header ? header->msg_iov : &stackVec;
1337 		int vecCount = header ? header->msg_iovlen : 1;
1338 
1339 		ssize_t written = socket->first_info->send_data_no_buffer(
1340 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1341 			addressLength, flags);
1342 
1343 		// we only send signals when called from userland
1344 		if (written == EPIPE && is_syscall() && !nosignal)
1345 			send_signal(find_thread(NULL), SIGPIPE);
1346 
1347 		if (written > 0)
1348 			ancillaryDataDeleter.Detach();
1349 		return written;
1350 	}
1351 
1352 	// By convention, if a header is given, the (data, length) equals the first
1353 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1354 	// the size of the remaining ones.
1355 	if (header != NULL) {
1356 		if (header->msg_iovlen <= 1) {
1357 			header = NULL;
1358 		} else {
1359 			for (int i = 1; i < header->msg_iovlen; i++)
1360 				bytesLeft += header->msg_iov[i].iov_len;
1361 		}
1362 	}
1363 
1364 	ssize_t bytesSent = 0;
1365 	size_t vecOffset = 0;
1366 	uint32 vecIndex = 0;
1367 
1368 	while (bytesLeft > 0) {
1369 		// TODO: useful, maybe even computed header space!
1370 		net_buffer* buffer = gNetBufferModule.create(256);
1371 		if (buffer == NULL)
1372 			return ENOBUFS;
1373 
1374 		while (buffer->size < socket->send.buffer_size
1375 			&& buffer->size < bytesLeft) {
1376 			if (vecIndex > 0 && vecOffset == 0) {
1377 				// retrieve next iovec buffer from header
1378 				data = header->msg_iov[vecIndex].iov_base;
1379 				length = header->msg_iov[vecIndex].iov_len;
1380 			}
1381 
1382 			size_t bytes = length;
1383 			if (buffer->size + bytes > socket->send.buffer_size)
1384 				bytes = socket->send.buffer_size - buffer->size;
1385 
1386 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1387 				gNetBufferModule.free(buffer);
1388 				return ENOBUFS;
1389 			}
1390 
1391 			if (bytes != length) {
1392 				// partial send
1393 				vecOffset = bytes;
1394 				length -= vecOffset;
1395 				data = (uint8*)data + vecOffset;
1396 			} else if (header != NULL) {
1397 				// proceed with next buffer, if any
1398 				vecOffset = 0;
1399 				vecIndex++;
1400 
1401 				if (vecIndex >= (uint32)header->msg_iovlen)
1402 					break;
1403 			}
1404 		}
1405 
1406 		// attach ancillary data to the first buffer
1407 		status_t status;
1408 		if (ancillaryData != NULL) {
1409 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1410 			ancillaryDataDeleter.Detach();
1411 			ancillaryData = NULL;
1412 		}
1413 
1414 		size_t bufferSize = buffer->size;
1415 		buffer->flags = flags;
1416 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1417 		memcpy(buffer->destination, address, addressLength);
1418 		buffer->destination->sa_len = addressLength;
1419 
1420 		status = socket->first_info->send_data(socket->first_protocol, buffer);
1421 		if (status != B_OK) {
1422 			// we only send signals when called from userland
1423 			if (status == EPIPE && is_syscall() && !nosignal)
1424 				send_signal(find_thread(NULL), SIGPIPE);
1425 
1426 			size_t sizeAfterSend = buffer->size;
1427 			gNetBufferModule.free(buffer);
1428 
1429 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1430 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1431 				// this appears to be a partial write
1432 				return bytesSent + (bufferSize - sizeAfterSend);
1433 			}
1434 			return status;
1435 		}
1436 
1437 		bytesLeft -= bufferSize;
1438 		bytesSent += bufferSize;
1439 	}
1440 
1441 	return bytesSent;
1442 }
1443 
1444 
1445 status_t
1446 socket_set_option(net_socket* socket, int level, int option, const void* value,
1447 	int length)
1448 {
1449 	if (level != SOL_SOCKET)
1450 		return ENOPROTOOPT;
1451 
1452 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1453 
1454 	switch (option) {
1455 		// TODO: implement other options!
1456 		case SO_LINGER:
1457 		{
1458 			if (length < (int)sizeof(struct linger))
1459 				return B_BAD_VALUE;
1460 
1461 			struct linger* linger = (struct linger*)value;
1462 			if (linger->l_onoff) {
1463 				socket->options |= SO_LINGER;
1464 				socket->linger = linger->l_linger;
1465 			} else {
1466 				socket->options &= ~SO_LINGER;
1467 				socket->linger = 0;
1468 			}
1469 			return B_OK;
1470 		}
1471 
1472 		case SO_SNDBUF:
1473 			if (length != sizeof(uint32))
1474 				return B_BAD_VALUE;
1475 
1476 			socket->send.buffer_size = *(const uint32*)value;
1477 			return B_OK;
1478 
1479 		case SO_RCVBUF:
1480 			if (length != sizeof(uint32))
1481 				return B_BAD_VALUE;
1482 
1483 			socket->receive.buffer_size = *(const uint32*)value;
1484 			return B_OK;
1485 
1486 		case SO_SNDLOWAT:
1487 			if (length != sizeof(uint32))
1488 				return B_BAD_VALUE;
1489 
1490 			socket->send.low_water_mark = *(const uint32*)value;
1491 			return B_OK;
1492 
1493 		case SO_RCVLOWAT:
1494 			if (length != sizeof(uint32))
1495 				return B_BAD_VALUE;
1496 
1497 			socket->receive.low_water_mark = *(const uint32*)value;
1498 			return B_OK;
1499 
1500 		case SO_RCVTIMEO:
1501 		case SO_SNDTIMEO:
1502 		{
1503 			if (length != sizeof(struct timeval))
1504 				return B_BAD_VALUE;
1505 
1506 			const struct timeval* timeval = (const struct timeval*)value;
1507 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1508 			if (timeout == 0)
1509 				timeout = B_INFINITE_TIMEOUT;
1510 
1511 			if (option == SO_SNDTIMEO)
1512 				socket->send.timeout = timeout;
1513 			else
1514 				socket->receive.timeout = timeout;
1515 			return B_OK;
1516 		}
1517 
1518 		case SO_NONBLOCK:
1519 			if (length != sizeof(int32))
1520 				return B_BAD_VALUE;
1521 
1522 			if (*(const int32*)value) {
1523 				socket->send.timeout = 0;
1524 				socket->receive.timeout = 0;
1525 			} else {
1526 				socket->send.timeout = B_INFINITE_TIMEOUT;
1527 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1528 			}
1529 			return B_OK;
1530 
1531 		case SO_BROADCAST:
1532 		case SO_DEBUG:
1533 		case SO_DONTROUTE:
1534 		case SO_KEEPALIVE:
1535 		case SO_OOBINLINE:
1536 		case SO_REUSEADDR:
1537 		case SO_REUSEPORT:
1538 		case SO_USELOOPBACK:
1539 			if (length != sizeof(int32))
1540 				return B_BAD_VALUE;
1541 
1542 			if (*(const int32*)value)
1543 				socket->options |= option;
1544 			else
1545 				socket->options &= ~option;
1546 			return B_OK;
1547 
1548 		case SO_BINDTODEVICE:
1549 		{
1550 			if (length != sizeof(uint32))
1551 				return B_BAD_VALUE;
1552 
1553 			// TODO: we might want to check if the device exists at all
1554 			// (although it doesn't really harm when we don't)
1555 			socket->bound_to_device = *(const uint32*)value;
1556 			return B_OK;
1557 		}
1558 
1559 		default:
1560 			break;
1561 	}
1562 
1563 	dprintf("socket_setsockopt: unknown option %d\n", option);
1564 	return ENOPROTOOPT;
1565 }
1566 
1567 
1568 int
1569 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1570 	int length)
1571 {
1572 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1573 		level, option, value, length);
1574 }
1575 
1576 
1577 int
1578 socket_shutdown(net_socket* socket, int direction)
1579 {
1580 	return socket->first_info->shutdown(socket->first_protocol, direction);
1581 }
1582 
1583 
1584 status_t
1585 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1586 {
1587 	sockets[0] = NULL;
1588 	sockets[1] = NULL;
1589 
1590 	// create sockets
1591 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1592 	if (error != B_OK)
1593 		return error;
1594 
1595 	error = socket_open(family, type, protocol, &sockets[1]);
1596 
1597 	// bind one
1598 	if (error == B_OK)
1599 		error = socket_bind(sockets[0], NULL, 0);
1600 
1601 	// start listening
1602 	if (error == B_OK && type == SOCK_STREAM)
1603 		error = socket_listen(sockets[0], 1);
1604 
1605 	// connect them
1606 	if (error == B_OK) {
1607 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1608 			sockets[0]->address.ss_len);
1609 	}
1610 
1611 	if (error == B_OK) {
1612 		// accept a socket
1613 		if (type == SOCK_STREAM) {
1614 			net_socket* acceptedSocket = NULL;
1615 			error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1616 			if (error == B_OK) {
1617 				// everything worked: close the listener socket
1618 				socket_close(sockets[0]);
1619 				socket_free(sockets[0]);
1620 				sockets[0] = acceptedSocket;
1621 			}
1622 		// connect the other side
1623 		} else {
1624 			error = socket_connect(sockets[0], (sockaddr*)&sockets[1]->address,
1625 				sockets[1]->address.ss_len);
1626 		}
1627 	}
1628 
1629 	if (error != B_OK) {
1630 		// close sockets on error
1631 		for (int i = 0; i < 2; i++) {
1632 			if (sockets[i] != NULL) {
1633 				socket_close(sockets[i]);
1634 				socket_free(sockets[i]);
1635 				sockets[i] = NULL;
1636 			}
1637 		}
1638 	}
1639 
1640 	return error;
1641 }
1642 
1643 
1644 //	#pragma mark -
1645 
1646 
1647 static status_t
1648 socket_std_ops(int32 op, ...)
1649 {
1650 	switch (op) {
1651 		case B_MODULE_INIT:
1652 		{
1653 			new (&sSocketList) SocketList;
1654 			mutex_init(&sSocketLock, "socket list");
1655 
1656 #if ENABLE_DEBUGGER_COMMANDS
1657 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1658 			add_debugger_command("socket", dump_socket, "dumps a socket");
1659 #endif
1660 			return B_OK;
1661 		}
1662 		case B_MODULE_UNINIT:
1663 			ASSERT(sSocketList.IsEmpty());
1664 			mutex_destroy(&sSocketLock);
1665 
1666 #if ENABLE_DEBUGGER_COMMANDS
1667 			remove_debugger_command("socket", dump_socket);
1668 			remove_debugger_command("sockets", dump_sockets);
1669 #endif
1670 			return B_OK;
1671 
1672 		default:
1673 			return B_ERROR;
1674 	}
1675 }
1676 
1677 
1678 net_socket_module_info gNetSocketModule = {
1679 	{
1680 		NET_SOCKET_MODULE_NAME,
1681 		0,
1682 		socket_std_ops
1683 	},
1684 	socket_open,
1685 	socket_close,
1686 	socket_free,
1687 
1688 	socket_control,
1689 
1690 	socket_read_avail,
1691 	socket_send_avail,
1692 
1693 	socket_send_data,
1694 	socket_receive_data,
1695 
1696 	socket_get_option,
1697 	socket_set_option,
1698 
1699 	socket_get_next_stat,
1700 
1701 	// connections
1702 	socket_acquire,
1703 	socket_release,
1704 	socket_spawn_pending,
1705 	socket_dequeue_connected,
1706 	socket_count_connected,
1707 	socket_set_max_backlog,
1708 	socket_has_parent,
1709 	socket_connected,
1710 	socket_aborted,
1711 
1712 	// notifications
1713 	socket_request_notification,
1714 	socket_cancel_notification,
1715 	socket_notify,
1716 
1717 	// standard socket API
1718 	socket_accept,
1719 	socket_bind,
1720 	socket_connect,
1721 	socket_getpeername,
1722 	socket_getsockname,
1723 	socket_getsockopt,
1724 	socket_listen,
1725 	socket_receive,
1726 	socket_send,
1727 	socket_setsockopt,
1728 	socket_shutdown,
1729 	socket_socketpair
1730 };
1731 
1732