xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 4c07199d8201fcf267e90be0d24b76799d03cea6)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static status_t
166 create_socket(int family, int type, int protocol, net_socket_private** _socket)
167 {
168 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
169 	if (socket == NULL)
170 		return B_NO_MEMORY;
171 	status_t status = socket->InitCheck();
172 	if (status != B_OK) {
173 		delete socket;
174 		return status;
175 	}
176 
177 	socket->family = family;
178 	socket->type = type;
179 	socket->protocol = protocol;
180 
181 	status = get_domain_protocols(socket);
182 	if (status != B_OK) {
183 		delete socket;
184 		return status;
185 	}
186 
187 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
188 		socket->type, socket->protocol);
189 
190 #ifdef TRACE_SOCKET
191 	net_protocol* current = socket->first_protocol;
192 	for (int i = 0; current != NULL; current = current->next, i++)
193 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
194 #endif
195 
196 	*_socket = socket;
197 	return B_OK;
198 }
199 
200 
201 static status_t
202 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
203 	void* data, size_t dataLen)
204 {
205 	cmsghdr* header = (cmsghdr*)data;
206 
207 	if (dataLen == 0)
208 		return B_OK;
209 
210 	if (socket->first_info->add_ancillary_data == NULL)
211 		return B_NOT_SUPPORTED;
212 
213 	while (true) {
214 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
215 			return B_BAD_VALUE;
216 
217 		status_t status = socket->first_info->add_ancillary_data(
218 			socket->first_protocol, container, header);
219 		if (status != B_OK)
220 			return status;
221 
222 		if (dataLen <= _ALIGN(header->cmsg_len))
223 			break;
224 		dataLen -= _ALIGN(header->cmsg_len);
225 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
226 	}
227 
228 	return B_OK;
229 }
230 
231 
232 static status_t
233 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
234 	msghdr* messageHeader)
235 {
236 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
237 	int dataBufferLen = messageHeader->msg_controllen;
238 
239 	if (container == NULL || dataBuffer == NULL) {
240 		messageHeader->msg_controllen = 0;
241 		return B_OK;
242 	}
243 
244 	ancillary_data_header header;
245 	void* data = NULL;
246 
247 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
248 		if (socket->first_info->process_ancillary_data == NULL)
249 			return B_NOT_SUPPORTED;
250 
251 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
252 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
253 		if (bytesWritten < 0)
254 			return bytesWritten;
255 
256 		dataBuffer += bytesWritten;
257 		dataBufferLen -= bytesWritten;
258 	}
259 
260 	messageHeader->msg_controllen -= dataBufferLen;
261 
262 	return B_OK;
263 }
264 
265 
266 static status_t
267 process_ancillary_data(net_socket* socket,
268 	net_buffer* buffer, msghdr* messageHeader)
269 {
270 	void *dataBuffer = messageHeader->msg_control;
271 	ssize_t bytesWritten;
272 
273 	if (dataBuffer == NULL) {
274 		messageHeader->msg_controllen = 0;
275 		return B_OK;
276 	}
277 
278 	if (socket->first_info->process_ancillary_data_no_container == NULL)
279 		return B_NOT_SUPPORTED;
280 
281 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
282 		socket->first_protocol, buffer, dataBuffer,
283 		messageHeader->msg_controllen);
284 	if (bytesWritten < 0)
285 		return bytesWritten;
286 	messageHeader->msg_controllen = bytesWritten;
287 
288 	return B_OK;
289 }
290 
291 
292 static ssize_t
293 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
294 	size_t length, int flags)
295 {
296 	iovec stackVec = { data, length };
297 	iovec* vecs = header ? header->msg_iov : &stackVec;
298 	int vecCount = header ? header->msg_iovlen : 1;
299 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
300 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
301 
302 	ancillary_data_container* ancillaryData = NULL;
303 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
304 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
305 		addressLen, flags);
306 	if (bytesRead < 0)
307 		return bytesRead;
308 
309 	CObjectDeleter<
310 		ancillary_data_container, void, delete_ancillary_data_container>
311 		ancillaryDataDeleter(ancillaryData);
312 
313 	// process ancillary data
314 	if (header != NULL) {
315 		status_t status = process_ancillary_data(socket, ancillaryData, header);
316 		if (status != B_OK)
317 			return status;
318 
319 		header->msg_flags = 0;
320 	}
321 
322 	return bytesRead;
323 }
324 
325 
326 #if ENABLE_DEBUGGER_COMMANDS
327 
328 
329 static void
330 print_socket_line(net_socket_private* socket, const char* prefix)
331 {
332 	BReference<net_socket_private> parent;
333 	if (socket->parent.PrivatePointer() != NULL)
334 		parent = socket->parent.GetReference();
335 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
336 		socket->family, socket->type, socket->protocol, socket->owner,
337 		socket->first_protocol, socket->first_info, parent.Get(),
338 		parent.IsSet() ? socket->is_connected ? " (c)" : " (p)" : "");
339 }
340 
341 
342 static int
343 dump_socket(int argc, char** argv)
344 {
345 	if (argc < 2) {
346 		kprintf("usage: %s [address]\n", argv[0]);
347 		return 0;
348 	}
349 
350 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
351 
352 	kprintf("SOCKET %p\n", socket);
353 	kprintf("  family.type.protocol: %d.%d.%d\n",
354 		socket->family, socket->type, socket->protocol);
355 	BReference<net_socket_private> parent;
356 	if (socket->parent.PrivatePointer() != NULL)
357 		parent = socket->parent.GetReference();
358 	kprintf("  parent:               %p\n", parent.Get());
359 	kprintf("  first protocol:       %p\n", socket->first_protocol);
360 	kprintf("  first module_info:    %p\n", socket->first_info);
361 	kprintf("  options:              %x\n", socket->options);
362 	kprintf("  linger:               %d\n", socket->linger);
363 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
364 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
365 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
366 	kprintf("  is connected:         %d\n", socket->is_connected);
367 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
368 
369 	if (socket->child_count == 0)
370 		return 0;
371 
372 	kprintf("    pending children:\n");
373 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
374 	while (net_socket_private* child = iterator.Next()) {
375 		print_socket_line(child, "      ");
376 	}
377 
378 	kprintf("    connected children:\n");
379 	iterator = socket->connected_children.GetIterator();
380 	while (net_socket_private* child = iterator.Next()) {
381 		print_socket_line(child, "      ");
382 	}
383 
384 	return 0;
385 }
386 
387 
388 static int
389 dump_sockets(int argc, char** argv)
390 {
391 	kprintf("address        kind  owner protocol   module_info parent\n");
392 
393 	SocketList::Iterator iterator = sSocketList.GetIterator();
394 	while (net_socket_private* socket = iterator.Next()) {
395 		print_socket_line(socket, "");
396 
397 		SocketList::Iterator childIterator
398 			= socket->pending_children.GetIterator();
399 		while (net_socket_private* child = childIterator.Next()) {
400 			print_socket_line(child, " ");
401 		}
402 
403 		childIterator = socket->connected_children.GetIterator();
404 		while (net_socket_private* child = childIterator.Next()) {
405 			print_socket_line(child, " ");
406 		}
407 	}
408 
409 	return 0;
410 }
411 
412 
413 #endif	// ENABLE_DEBUGGER_COMMANDS
414 
415 
416 //	#pragma mark -
417 
418 
419 status_t
420 socket_open(int family, int type, int protocol, net_socket** _socket)
421 {
422 	net_socket_private* socket;
423 	status_t status = create_socket(family, type, protocol, &socket);
424 	if (status != B_OK)
425 		return status;
426 
427 	status = socket->first_info->open(socket->first_protocol);
428 	if (status != B_OK) {
429 		delete socket;
430 		return status;
431 	}
432 
433 	socket->owner = team_get_current_team_id();
434 	socket->is_in_socket_list = true;
435 
436 	mutex_lock(&sSocketLock);
437 	sSocketList.Add(socket);
438 	mutex_unlock(&sSocketLock);
439 
440 	*_socket = socket;
441 	return B_OK;
442 }
443 
444 
445 status_t
446 socket_close(net_socket* _socket)
447 {
448 	net_socket_private* socket = (net_socket_private*)_socket;
449 	return socket->first_info->close(socket->first_protocol);
450 }
451 
452 
453 void
454 socket_free(net_socket* _socket)
455 {
456 	net_socket_private* socket = (net_socket_private*)_socket;
457 	socket->first_info->free(socket->first_protocol);
458 	socket->ReleaseReference();
459 }
460 
461 
462 status_t
463 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
464 {
465 	switch (op) {
466 		case FIONBIO:
467 		{
468 			if (data == NULL)
469 				return B_BAD_VALUE;
470 
471 			int value;
472 			if (is_syscall()) {
473 				if (!IS_USER_ADDRESS(data)
474 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
475 					return B_BAD_ADDRESS;
476 				}
477 			} else
478 				value = *(int*)data;
479 
480 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
481 				sizeof(int));
482 		}
483 
484 		case FIONREAD:
485 		{
486 			if (data == NULL || (socket->options & SO_ACCEPTCONN) != 0)
487 				return B_BAD_VALUE;
488 
489 			int available = (int)socket_read_avail(socket);
490 			if (available < 0)
491 				available = 0;
492 
493 			if (is_syscall()) {
494 				if (!IS_USER_ADDRESS(data)
495 					|| user_memcpy(data, &available, sizeof(available))
496 						!= B_OK) {
497 					return B_BAD_ADDRESS;
498 				}
499 			} else
500 				*(int*)data = available;
501 
502 			return B_OK;
503 		}
504 
505 		case B_SET_BLOCKING_IO:
506 		case B_SET_NONBLOCKING_IO:
507 		{
508 			int value = op == B_SET_NONBLOCKING_IO;
509 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
510 				sizeof(int));
511 		}
512 	}
513 
514 	return socket->first_info->control(socket->first_protocol,
515 		LEVEL_DRIVER_IOCTL, op, data, &length);
516 }
517 
518 
519 ssize_t
520 socket_read_avail(net_socket* socket)
521 {
522 	return socket->first_info->read_avail(socket->first_protocol);
523 }
524 
525 
526 ssize_t
527 socket_send_avail(net_socket* socket)
528 {
529 	return socket->first_info->send_avail(socket->first_protocol);
530 }
531 
532 
533 status_t
534 socket_send_data(net_socket* socket, net_buffer* buffer)
535 {
536 	return socket->first_info->send_data(socket->first_protocol,
537 		buffer);
538 }
539 
540 
541 status_t
542 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
543 	net_buffer** _buffer)
544 {
545 	status_t status = socket->first_info->read_data(socket->first_protocol,
546 		length, flags, _buffer);
547 	if (status != B_OK)
548 		return status;
549 
550 	if (*_buffer && length < (*_buffer)->size) {
551 		// discard any data behind the amount requested
552 		gNetBufferModule.trim(*_buffer, length);
553 	}
554 
555 	return status;
556 }
557 
558 
559 status_t
560 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
561 {
562 	MutexLocker locker(sSocketLock);
563 
564 	net_socket_private* socket = NULL;
565 	SocketList::Iterator iterator = sSocketList.GetIterator();
566 	uint32 cookie = *_cookie;
567 	uint32 count = 0;
568 
569 	while (true) {
570 		socket = iterator.Next();
571 		if (socket == NULL)
572 			return B_ENTRY_NOT_FOUND;
573 
574 		// TODO: also traverse the pending connections
575 		if (count == cookie)
576 			break;
577 
578 		if (family == -1 || family == socket->family)
579 			count++;
580 	}
581 
582 	*_cookie = count + 1;
583 
584 	stat->family = socket->family;
585 	stat->type = socket->type;
586 	stat->protocol = socket->protocol;
587 	stat->owner = socket->owner;
588 	stat->state[0] = '\0';
589 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
590 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
591 	stat->receive_queue_size = 0;
592 	stat->send_queue_size = 0;
593 
594 	// fill in protocol specific data (if supported by the protocol)
595 	size_t length = sizeof(net_stat);
596 	socket->first_info->control(socket->first_protocol, socket->protocol,
597 		NET_STAT_SOCKET, stat, &length);
598 
599 	return B_OK;
600 }
601 
602 
603 //	#pragma mark - connections
604 
605 
606 bool
607 socket_acquire(net_socket* _socket)
608 {
609 	net_socket_private* socket = (net_socket_private*)_socket;
610 
611 	// During destruction, the socket might still be accessible over its
612 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
613 	// socket anymore -- while not obvious, the endpoint protocol is responsible
614 	// for the proper locking here.
615 	if (socket->CountReferences() == 0)
616 		return false;
617 
618 	socket->AcquireReference();
619 	return true;
620 }
621 
622 
623 bool
624 socket_release(net_socket* _socket)
625 {
626 	net_socket_private* socket = (net_socket_private*)_socket;
627 	return socket->ReleaseReference();
628 }
629 
630 
631 status_t
632 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
633 {
634 	net_socket_private* parent = (net_socket_private*)_parent;
635 
636 	TRACE("%s(%p)\n", __FUNCTION__, parent);
637 
638 	MutexLocker locker(parent->lock);
639 
640 	// We actually accept more pending connections to compensate for those
641 	// that never complete, and also make sure at least a single connection
642 	// can always be accepted
643 	if (parent->child_count > 3 * parent->max_backlog / 2)
644 		return ENOBUFS;
645 
646 	net_socket_private* socket;
647 	status_t status = create_socket(parent->family, parent->type,
648 		parent->protocol, &socket);
649 	if (status != B_OK)
650 		return status;
651 
652 	// inherit parent's properties
653 	socket->send = parent->send;
654 	socket->receive = parent->receive;
655 	socket->options = parent->options & ~SO_ACCEPTCONN;
656 	socket->linger = parent->linger;
657 	socket->owner = parent->owner;
658 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
659 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
660 
661 	// add to the parent's list of pending connections
662 	parent->pending_children.Add(socket);
663 	socket->parent = parent;
664 	parent->child_count++;
665 
666 	*_socket = socket;
667 	return B_OK;
668 }
669 
670 
671 /*!	Dequeues a connected child from a parent socket.
672 	It also returns a reference with the child socket.
673 */
674 status_t
675 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
676 {
677 	net_socket_private* parent = (net_socket_private*)_parent;
678 
679 	mutex_lock(&parent->lock);
680 
681 	net_socket_private* socket = parent->connected_children.RemoveHead();
682 	if (socket != NULL) {
683 		socket->AcquireReference();
684 		socket->RemoveFromParent();
685 		parent->child_count--;
686 		*_socket = socket;
687 	}
688 
689 	mutex_unlock(&parent->lock);
690 
691 	if (socket == NULL)
692 		return B_ENTRY_NOT_FOUND;
693 
694 	return B_OK;
695 }
696 
697 
698 ssize_t
699 socket_count_connected(net_socket* _parent)
700 {
701 	net_socket_private* parent = (net_socket_private*)_parent;
702 
703 	MutexLocker _(parent->lock);
704 	return parent->connected_children.Count();
705 }
706 
707 
708 status_t
709 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
710 {
711 	net_socket_private* socket = (net_socket_private*)_socket;
712 
713 	// we enforce an upper limit of connections waiting to be accepted
714 	if (backlog > 256)
715 		backlog = 256;
716 
717 	MutexLocker _(socket->lock);
718 
719 	// first remove the pending connections, then the already connected
720 	// ones as needed
721 	net_socket_private* child;
722 	while (socket->child_count > backlog
723 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
724 		child->RemoveFromParent();
725 		socket->child_count--;
726 	}
727 	while (socket->child_count > backlog
728 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
729 		child->RemoveFromParent();
730 		socket->child_count--;
731 	}
732 
733 	socket->max_backlog = backlog;
734 	return B_OK;
735 }
736 
737 
738 /*!	Returns whether or not this socket has a parent. The parent might not be
739 	valid anymore, though.
740 */
741 bool
742 socket_has_parent(net_socket* _socket)
743 {
744 	net_socket_private* socket = (net_socket_private*)_socket;
745 	return socket->parent != NULL;
746 }
747 
748 
749 /*!	The socket has been connected. It will be moved to the connected queue
750 	of its parent socket.
751 */
752 status_t
753 socket_connected(net_socket* _socket)
754 {
755 	net_socket_private* socket = (net_socket_private*)_socket;
756 
757 	TRACE("socket_connected(%p)\n", socket);
758 
759 	if (socket->parent == NULL) {
760 		socket->is_connected = true;
761 		return B_OK;
762 	}
763 
764 	BReference<net_socket_private> parent = socket->parent.GetReference();
765 	if (!parent.IsSet())
766 		return B_BAD_VALUE;
767 
768 	MutexLocker _(parent->lock);
769 
770 	parent->pending_children.Remove(socket);
771 	parent->connected_children.Add(socket);
772 	socket->is_connected = true;
773 
774 	// notify parent
775 	if (parent->select_pool)
776 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
777 
778 	return B_OK;
779 }
780 
781 
782 /*!	The socket has been aborted. Steals the parent's reference, and releases
783 	it.
784 */
785 status_t
786 socket_aborted(net_socket* _socket)
787 {
788 	net_socket_private* socket = (net_socket_private*)_socket;
789 
790 	TRACE("socket_aborted(%p)\n", socket);
791 
792 	BReference<net_socket_private> parent = socket->parent.GetReference();
793 	if (!parent.IsSet())
794 		return B_BAD_VALUE;
795 
796 	MutexLocker _(parent->lock);
797 
798 	if (socket->is_connected)
799 		parent->connected_children.Remove(socket);
800 	else
801 		parent->pending_children.Remove(socket);
802 
803 	parent->child_count--;
804 	socket->RemoveFromParent();
805 
806 	return B_OK;
807 }
808 
809 
810 //	#pragma mark - notifications
811 
812 
813 status_t
814 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
815 {
816 	net_socket_private* socket = (net_socket_private*)_socket;
817 
818 	mutex_lock(&socket->lock);
819 
820 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
821 		event);
822 
823 	mutex_unlock(&socket->lock);
824 
825 	if (status != B_OK)
826 		return status;
827 
828 	// check if the event is already present
829 	// TODO: add support for poll() types
830 
831 	switch (event) {
832 		case B_SELECT_READ:
833 		{
834 			ssize_t available = socket_read_avail(socket);
835 			if ((ssize_t)socket->receive.low_water_mark <= available
836 				|| available < B_OK)
837 				notify_select_event(sync, event);
838 			break;
839 		}
840 		case B_SELECT_WRITE:
841 		{
842 			if ((socket->options & SO_ACCEPTCONN) != 0)
843 				break;
844 
845 			ssize_t available = socket_send_avail(socket);
846 			if ((ssize_t)socket->send.low_water_mark <= available
847 				|| available < B_OK)
848 				notify_select_event(sync, event);
849 			break;
850 		}
851 		case B_SELECT_ERROR:
852 			if (socket->error != B_OK)
853 				notify_select_event(sync, event);
854 			break;
855 	}
856 
857 	return B_OK;
858 }
859 
860 
861 status_t
862 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
863 {
864 	net_socket_private* socket = (net_socket_private*)_socket;
865 
866 	MutexLocker _(socket->lock);
867 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
868 }
869 
870 
871 status_t
872 socket_notify(net_socket* _socket, uint8 event, int32 value)
873 {
874 	net_socket_private* socket = (net_socket_private*)_socket;
875 	bool notify = true;
876 
877 	switch (event) {
878 		case B_SELECT_READ:
879 			if ((ssize_t)socket->receive.low_water_mark > value
880 				&& value >= B_OK)
881 				notify = false;
882 			break;
883 
884 		case B_SELECT_WRITE:
885 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
886 				notify = false;
887 			break;
888 
889 		case B_SELECT_ERROR:
890 			socket->error = value;
891 			break;
892 	}
893 
894 	MutexLocker _(socket->lock);
895 
896 	if (notify && socket->select_pool != NULL) {
897 		notify_select_event_pool(socket->select_pool, event);
898 
899 		if (event == B_SELECT_ERROR) {
900 			// always notify read/write on error
901 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
902 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
903 		}
904 	}
905 
906 	return B_OK;
907 }
908 
909 
910 //	#pragma mark - standard socket API
911 
912 
913 int
914 socket_accept(net_socket* socket, struct sockaddr* address,
915 	socklen_t* _addressLength, net_socket** _acceptedSocket)
916 {
917 	if ((socket->options & SO_ACCEPTCONN) == 0)
918 		return B_BAD_VALUE;
919 
920 	net_socket* accepted;
921 	status_t status = socket->first_info->accept(socket->first_protocol,
922 		&accepted);
923 	if (status != B_OK)
924 		return status;
925 
926 	if (address && *_addressLength > 0) {
927 		memcpy(address, &accepted->peer, min_c(*_addressLength,
928 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
929 		*_addressLength = accepted->peer.ss_len;
930 	}
931 
932 	*_acceptedSocket = accepted;
933 	return B_OK;
934 }
935 
936 
937 int
938 socket_bind(net_socket* socket, const struct sockaddr* address,
939 	socklen_t addressLength)
940 {
941 	sockaddr empty;
942 	if (address == NULL) {
943 		// special - try to bind to an empty address, like INADDR_ANY
944 		memset(&empty, 0, sizeof(sockaddr));
945 		empty.sa_len = sizeof(sockaddr);
946 		empty.sa_family = socket->family;
947 
948 		address = &empty;
949 		addressLength = sizeof(sockaddr);
950 	}
951 
952 	if (socket->address.ss_len != 0)
953 		return B_BAD_VALUE;
954 
955 	memcpy(&socket->address, address, sizeof(sockaddr));
956 	socket->address.ss_len = sizeof(sockaddr_storage);
957 
958 	status_t status = socket->first_info->bind(socket->first_protocol,
959 		(sockaddr*)address);
960 	if (status != B_OK) {
961 		// clear address again, as binding failed
962 		socket->address.ss_len = 0;
963 	}
964 
965 	return status;
966 }
967 
968 
969 int
970 socket_connect(net_socket* socket, const struct sockaddr* address,
971 	socklen_t addressLength)
972 {
973 	if (address == NULL || addressLength == 0)
974 		return ENETUNREACH;
975 
976 	if (socket->address.ss_len == 0) {
977 		// try to bind first
978 		status_t status = socket_bind(socket, NULL, 0);
979 		if (status != B_OK)
980 			return status;
981 	}
982 
983 	return socket->first_info->connect(socket->first_protocol, address);
984 }
985 
986 
987 int
988 socket_getpeername(net_socket* _socket, struct sockaddr* address,
989 	socklen_t* _addressLength)
990 {
991 	net_socket_private* socket = (net_socket_private*)_socket;
992 	BReference<net_socket_private> parent;
993 	if (socket->parent.PrivatePointer() != NULL)
994 		parent = socket->parent.GetReference();
995 
996 	if ((!parent.IsSet() && !socket->is_connected) || socket->peer.ss_len == 0)
997 		return ENOTCONN;
998 
999 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1000 	*_addressLength = socket->peer.ss_len;
1001 	return B_OK;
1002 }
1003 
1004 
1005 int
1006 socket_getsockname(net_socket* socket, struct sockaddr* address,
1007 	socklen_t* _addressLength)
1008 {
1009 	if (socket->address.ss_len == 0) {
1010 		struct sockaddr buffer;
1011 		memset(&buffer, 0, sizeof(buffer));
1012 		buffer.sa_family = socket->family;
1013 
1014 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
1015 		*_addressLength = sizeof(buffer);
1016 		return B_OK;
1017 	}
1018 
1019 	memcpy(address, &socket->address, min_c(*_addressLength,
1020 		socket->address.ss_len));
1021 	*_addressLength = socket->address.ss_len;
1022 	return B_OK;
1023 }
1024 
1025 
1026 status_t
1027 socket_get_option(net_socket* socket, int level, int option, void* value,
1028 	int* _length)
1029 {
1030 	if (level != SOL_SOCKET)
1031 		return ENOPROTOOPT;
1032 
1033 	switch (option) {
1034 		case SO_SNDBUF:
1035 		{
1036 			uint32* size = (uint32*)value;
1037 			*size = socket->send.buffer_size;
1038 			*_length = sizeof(uint32);
1039 			return B_OK;
1040 		}
1041 
1042 		case SO_RCVBUF:
1043 		{
1044 			uint32* size = (uint32*)value;
1045 			*size = socket->receive.buffer_size;
1046 			*_length = sizeof(uint32);
1047 			return B_OK;
1048 		}
1049 
1050 		case SO_SNDLOWAT:
1051 		{
1052 			uint32* size = (uint32*)value;
1053 			*size = socket->send.low_water_mark;
1054 			*_length = sizeof(uint32);
1055 			return B_OK;
1056 		}
1057 
1058 		case SO_RCVLOWAT:
1059 		{
1060 			uint32* size = (uint32*)value;
1061 			*size = socket->receive.low_water_mark;
1062 			*_length = sizeof(uint32);
1063 			return B_OK;
1064 		}
1065 
1066 		case SO_RCVTIMEO:
1067 		case SO_SNDTIMEO:
1068 		{
1069 			if (*_length < (int)sizeof(struct timeval))
1070 				return B_BAD_VALUE;
1071 
1072 			bigtime_t timeout;
1073 			if (option == SO_SNDTIMEO)
1074 				timeout = socket->send.timeout;
1075 			else
1076 				timeout = socket->receive.timeout;
1077 			if (timeout == B_INFINITE_TIMEOUT)
1078 				timeout = 0;
1079 
1080 			struct timeval* timeval = (struct timeval*)value;
1081 			timeval->tv_sec = timeout / 1000000LL;
1082 			timeval->tv_usec = timeout % 1000000LL;
1083 
1084 			*_length = sizeof(struct timeval);
1085 			return B_OK;
1086 		}
1087 
1088 		case SO_NONBLOCK:
1089 		{
1090 			int32* _set = (int32*)value;
1091 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1092 			*_length = sizeof(int32);
1093 			return B_OK;
1094 		}
1095 
1096 		case SO_ACCEPTCONN:
1097 		case SO_BROADCAST:
1098 		case SO_DEBUG:
1099 		case SO_DONTROUTE:
1100 		case SO_KEEPALIVE:
1101 		case SO_OOBINLINE:
1102 		case SO_REUSEADDR:
1103 		case SO_REUSEPORT:
1104 		case SO_USELOOPBACK:
1105 		{
1106 			int32* _set = (int32*)value;
1107 			*_set = (socket->options & option) != 0;
1108 			*_length = sizeof(int32);
1109 			return B_OK;
1110 		}
1111 
1112 		case SO_TYPE:
1113 		{
1114 			int32* _set = (int32*)value;
1115 			*_set = socket->type;
1116 			*_length = sizeof(int32);
1117 			return B_OK;
1118 		}
1119 
1120 		case SO_ERROR:
1121 		{
1122 			int32* _set = (int32*)value;
1123 			*_set = socket->error;
1124 			*_length = sizeof(int32);
1125 
1126 			socket->error = B_OK;
1127 				// clear error upon retrieval
1128 			return B_OK;
1129 		}
1130 
1131 		default:
1132 			break;
1133 	}
1134 
1135 	dprintf("socket_getsockopt: unknown option %d\n", option);
1136 	return ENOPROTOOPT;
1137 }
1138 
1139 
1140 int
1141 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1142 	int* _length)
1143 {
1144 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1145 		level, option, value, _length);
1146 }
1147 
1148 
1149 int
1150 socket_listen(net_socket* socket, int backlog)
1151 {
1152 	status_t status = socket->first_info->listen(socket->first_protocol,
1153 		backlog);
1154 	if (status == B_OK)
1155 		socket->options |= SO_ACCEPTCONN;
1156 
1157 	return status;
1158 }
1159 
1160 
1161 ssize_t
1162 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1163 	int flags)
1164 {
1165 	const int originalFlags = flags;
1166 
1167 	// MSG_NOSIGNAL is only meaningful for send(), not receive(), but it is
1168 	// sometimes specified anyway. Mask it off to avoid unnecessary errors.
1169 	flags &= ~MSG_NOSIGNAL;
1170 
1171 	// If the protocol sports read_data_no_buffer() we use it.
1172 	if (socket->first_info->read_data_no_buffer != NULL)
1173 		return socket_receive_no_buffer(socket, header, data, length, flags);
1174 
1175 	// Mask off flags handled in this function.
1176 	flags &= ~(MSG_TRUNC);
1177 
1178 	size_t totalLength = length;
1179 	if (header != NULL) {
1180 		ASSERT(data == header->msg_iov[0].iov_base);
1181 
1182 		// calculate the length considering all of the extra buffers
1183 		for (int i = 1; i < header->msg_iovlen; i++)
1184 			totalLength += header->msg_iov[i].iov_len;
1185 	}
1186 
1187 	net_buffer* buffer;
1188 	status_t status = socket->first_info->read_data(
1189 		socket->first_protocol, totalLength, flags, &buffer);
1190 	if (status != B_OK)
1191 		return status;
1192 
1193 	// process ancillary data
1194 	if (header != NULL) {
1195 		if (buffer != NULL && header->msg_control != NULL) {
1196 			ancillary_data_container* container
1197 				= gNetBufferModule.get_ancillary_data(buffer);
1198 			if (container != NULL)
1199 				status = process_ancillary_data(socket, container, header);
1200 			else
1201 				status = process_ancillary_data(socket, buffer, header);
1202 			if (status != B_OK) {
1203 				gNetBufferModule.free(buffer);
1204 				return status;
1205 			}
1206 		} else
1207 			header->msg_controllen = 0;
1208 	}
1209 
1210 	// TODO: - returning a NULL buffer when received 0 bytes
1211 	//         may not make much sense as we still need the address
1212 
1213 	size_t nameLen = 0;
1214 	if (header != NULL) {
1215 		// TODO: - consider the control buffer options
1216 		nameLen = header->msg_namelen;
1217 		header->msg_namelen = 0;
1218 		header->msg_flags = 0;
1219 	}
1220 
1221 	if (buffer == NULL)
1222 		return 0;
1223 
1224 	const size_t bytesReceived = buffer->size;
1225 	size_t bytesCopied = 0;
1226 
1227 	size_t toRead = min_c(bytesReceived, length);
1228 	status = gNetBufferModule.read(buffer, 0, data, toRead);
1229 	if (status != B_OK) {
1230 		gNetBufferModule.free(buffer);
1231 
1232 		if (status == B_BAD_ADDRESS)
1233 			return status;
1234 		return ENOBUFS;
1235 	}
1236 
1237 	// if first copy was a success, proceed to following copies as required
1238 	bytesCopied += toRead;
1239 
1240 	if (header != NULL) {
1241 		// We start at iovec[1] as { data, length } is iovec[0].
1242 		for (int i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1243 			iovec& vec = header->msg_iov[i];
1244 			toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1245 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1246 					toRead) < B_OK) {
1247 				break;
1248 			}
1249 
1250 			bytesCopied += toRead;
1251 		}
1252 
1253 		if (header->msg_name != NULL) {
1254 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1255 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1256 		}
1257 	}
1258 
1259 	gNetBufferModule.free(buffer);
1260 
1261 	if (bytesCopied < bytesReceived) {
1262 		if (header != NULL)
1263 			header->msg_flags = MSG_TRUNC;
1264 
1265 		if ((originalFlags & MSG_TRUNC) != 0)
1266 			return bytesReceived;
1267 	}
1268 
1269 	return bytesCopied;
1270 }
1271 
1272 
1273 ssize_t
1274 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1275 	int flags)
1276 {
1277 	const bool nosignal = ((flags & MSG_NOSIGNAL) != 0);
1278 	flags &= ~MSG_NOSIGNAL;
1279 
1280 	size_t bytesLeft = length;
1281 	if (length > SSIZE_MAX)
1282 		return B_BAD_VALUE;
1283 
1284 	ancillary_data_container* ancillaryData = NULL;
1285 	CObjectDeleter<
1286 		ancillary_data_container, void, delete_ancillary_data_container>
1287 		ancillaryDataDeleter;
1288 
1289 	const sockaddr* address = NULL;
1290 	socklen_t addressLength = 0;
1291 	if (header != NULL) {
1292 		address = (const sockaddr*)header->msg_name;
1293 		addressLength = header->msg_namelen;
1294 
1295 		// get the ancillary data
1296 		if (header->msg_control != NULL) {
1297 			ancillaryData = create_ancillary_data_container();
1298 			if (ancillaryData == NULL)
1299 				return B_NO_MEMORY;
1300 			ancillaryDataDeleter.SetTo(ancillaryData);
1301 
1302 			status_t status = add_ancillary_data(socket, ancillaryData,
1303 				(cmsghdr*)header->msg_control, header->msg_controllen);
1304 			if (status != B_OK)
1305 				return status;
1306 		}
1307 	}
1308 
1309 	if (addressLength == 0)
1310 		address = NULL;
1311 	else if (address == NULL)
1312 		return B_BAD_VALUE;
1313 
1314 	if (socket->peer.ss_len != 0) {
1315 		if (address != NULL)
1316 			return EISCONN;
1317 
1318 		// socket is connected, we use that address
1319 		address = (struct sockaddr*)&socket->peer;
1320 		addressLength = socket->peer.ss_len;
1321 	}
1322 
1323 	if (address == NULL || addressLength == 0) {
1324 		// don't know where to send to:
1325 		return EDESTADDRREQ;
1326 	}
1327 
1328 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1329 		&& bytesLeft > socket->send.buffer_size)
1330 		return EMSGSIZE;
1331 
1332 	if (socket->address.ss_len == 0) {
1333 		// try to bind first
1334 		status_t status = socket_bind(socket, NULL, 0);
1335 		if (status != B_OK)
1336 			return status;
1337 	}
1338 
1339 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1340 	if (socket->first_info->send_data_no_buffer != NULL) {
1341 		iovec stackVec = { (void*)data, length };
1342 		iovec* vecs = header ? header->msg_iov : &stackVec;
1343 		int vecCount = header ? header->msg_iovlen : 1;
1344 
1345 		ssize_t written = socket->first_info->send_data_no_buffer(
1346 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1347 			addressLength, flags);
1348 
1349 		// we only send signals when called from userland
1350 		if (written == EPIPE && is_syscall() && !nosignal)
1351 			send_signal(find_thread(NULL), SIGPIPE);
1352 
1353 		if (written > 0)
1354 			ancillaryDataDeleter.Detach();
1355 		return written;
1356 	}
1357 
1358 	// By convention, if a header is given, the (data, length) equals the first
1359 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1360 	// the size of the remaining ones.
1361 	if (header != NULL) {
1362 		if (header->msg_iovlen <= 1) {
1363 			header = NULL;
1364 		} else {
1365 			for (int i = 1; i < header->msg_iovlen; i++)
1366 				bytesLeft += header->msg_iov[i].iov_len;
1367 		}
1368 	}
1369 
1370 	ssize_t bytesSent = 0;
1371 	size_t vecOffset = 0;
1372 	uint32 vecIndex = 0;
1373 
1374 	while (bytesLeft > 0) {
1375 		// TODO: useful, maybe even computed header space!
1376 		net_buffer* buffer = gNetBufferModule.create(256);
1377 		if (buffer == NULL)
1378 			return ENOBUFS;
1379 
1380 		while (buffer->size < socket->send.buffer_size
1381 			&& buffer->size < bytesLeft) {
1382 			if (vecIndex > 0 && vecOffset == 0) {
1383 				// retrieve next iovec buffer from header
1384 				data = header->msg_iov[vecIndex].iov_base;
1385 				length = header->msg_iov[vecIndex].iov_len;
1386 			}
1387 
1388 			size_t bytes = length;
1389 			if (buffer->size + bytes > socket->send.buffer_size)
1390 				bytes = socket->send.buffer_size - buffer->size;
1391 
1392 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1393 				gNetBufferModule.free(buffer);
1394 				return ENOBUFS;
1395 			}
1396 
1397 			if (bytes != length) {
1398 				// partial send
1399 				vecOffset = bytes;
1400 				length -= vecOffset;
1401 				data = (uint8*)data + vecOffset;
1402 			} else if (header != NULL) {
1403 				// proceed with next buffer, if any
1404 				vecOffset = 0;
1405 				vecIndex++;
1406 
1407 				if (vecIndex >= (uint32)header->msg_iovlen)
1408 					break;
1409 			}
1410 		}
1411 
1412 		// attach ancillary data to the first buffer
1413 		status_t status;
1414 		if (ancillaryData != NULL) {
1415 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1416 			ancillaryDataDeleter.Detach();
1417 			ancillaryData = NULL;
1418 		}
1419 
1420 		size_t bufferSize = buffer->size;
1421 		buffer->flags = flags;
1422 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1423 		memcpy(buffer->destination, address, addressLength);
1424 		buffer->destination->sa_len = addressLength;
1425 
1426 		status = socket->first_info->send_data(socket->first_protocol, buffer);
1427 		if (status != B_OK) {
1428 			// we only send signals when called from userland
1429 			if (status == EPIPE && is_syscall() && !nosignal)
1430 				send_signal(find_thread(NULL), SIGPIPE);
1431 
1432 			size_t sizeAfterSend = buffer->size;
1433 			gNetBufferModule.free(buffer);
1434 
1435 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1436 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1437 				// this appears to be a partial write
1438 				return bytesSent + (bufferSize - sizeAfterSend);
1439 			}
1440 			return status;
1441 		}
1442 
1443 		bytesLeft -= bufferSize;
1444 		bytesSent += bufferSize;
1445 	}
1446 
1447 	return bytesSent;
1448 }
1449 
1450 
1451 status_t
1452 socket_set_option(net_socket* socket, int level, int option, const void* value,
1453 	int length)
1454 {
1455 	if (level != SOL_SOCKET)
1456 		return ENOPROTOOPT;
1457 
1458 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1459 
1460 	switch (option) {
1461 		// TODO: implement other options!
1462 		case SO_LINGER:
1463 		{
1464 			if (length < (int)sizeof(struct linger))
1465 				return B_BAD_VALUE;
1466 
1467 			struct linger* linger = (struct linger*)value;
1468 			if (linger->l_onoff) {
1469 				socket->options |= SO_LINGER;
1470 				socket->linger = linger->l_linger;
1471 			} else {
1472 				socket->options &= ~SO_LINGER;
1473 				socket->linger = 0;
1474 			}
1475 			return B_OK;
1476 		}
1477 
1478 		case SO_SNDBUF:
1479 			if (length != sizeof(uint32))
1480 				return B_BAD_VALUE;
1481 
1482 			socket->send.buffer_size = *(const uint32*)value;
1483 			return B_OK;
1484 
1485 		case SO_RCVBUF:
1486 			if (length != sizeof(uint32))
1487 				return B_BAD_VALUE;
1488 
1489 			socket->receive.buffer_size = *(const uint32*)value;
1490 			return B_OK;
1491 
1492 		case SO_SNDLOWAT:
1493 			if (length != sizeof(uint32))
1494 				return B_BAD_VALUE;
1495 
1496 			socket->send.low_water_mark = *(const uint32*)value;
1497 			return B_OK;
1498 
1499 		case SO_RCVLOWAT:
1500 			if (length != sizeof(uint32))
1501 				return B_BAD_VALUE;
1502 
1503 			socket->receive.low_water_mark = *(const uint32*)value;
1504 			return B_OK;
1505 
1506 		case SO_RCVTIMEO:
1507 		case SO_SNDTIMEO:
1508 		{
1509 			if (length != sizeof(struct timeval))
1510 				return B_BAD_VALUE;
1511 
1512 			const struct timeval* timeval = (const struct timeval*)value;
1513 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1514 			if (timeout == 0)
1515 				timeout = B_INFINITE_TIMEOUT;
1516 
1517 			if (option == SO_SNDTIMEO)
1518 				socket->send.timeout = timeout;
1519 			else
1520 				socket->receive.timeout = timeout;
1521 			return B_OK;
1522 		}
1523 
1524 		case SO_NONBLOCK:
1525 			if (length != sizeof(int32))
1526 				return B_BAD_VALUE;
1527 
1528 			if (*(const int32*)value) {
1529 				socket->send.timeout = 0;
1530 				socket->receive.timeout = 0;
1531 			} else {
1532 				socket->send.timeout = B_INFINITE_TIMEOUT;
1533 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1534 			}
1535 			return B_OK;
1536 
1537 		case SO_BROADCAST:
1538 		case SO_DEBUG:
1539 		case SO_DONTROUTE:
1540 		case SO_KEEPALIVE:
1541 		case SO_OOBINLINE:
1542 		case SO_REUSEADDR:
1543 		case SO_REUSEPORT:
1544 		case SO_USELOOPBACK:
1545 			if (length != sizeof(int32))
1546 				return B_BAD_VALUE;
1547 
1548 			if (*(const int32*)value)
1549 				socket->options |= option;
1550 			else
1551 				socket->options &= ~option;
1552 			return B_OK;
1553 
1554 		case SO_BINDTODEVICE:
1555 		{
1556 			if (length != sizeof(uint32))
1557 				return B_BAD_VALUE;
1558 
1559 			// TODO: we might want to check if the device exists at all
1560 			// (although it doesn't really harm when we don't)
1561 			socket->bound_to_device = *(const uint32*)value;
1562 			return B_OK;
1563 		}
1564 
1565 		default:
1566 			break;
1567 	}
1568 
1569 	dprintf("socket_setsockopt: unknown option %d\n", option);
1570 	return ENOPROTOOPT;
1571 }
1572 
1573 
1574 int
1575 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1576 	int length)
1577 {
1578 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1579 		level, option, value, length);
1580 }
1581 
1582 
1583 int
1584 socket_shutdown(net_socket* socket, int direction)
1585 {
1586 	return socket->first_info->shutdown(socket->first_protocol, direction);
1587 }
1588 
1589 
1590 status_t
1591 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1592 {
1593 	sockets[0] = NULL;
1594 	sockets[1] = NULL;
1595 
1596 	// create sockets
1597 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1598 	if (error != B_OK)
1599 		return error;
1600 
1601 	error = socket_open(family, type, protocol, &sockets[1]);
1602 
1603 	// bind one
1604 	if (error == B_OK)
1605 		error = socket_bind(sockets[0], NULL, 0);
1606 
1607 	// start listening
1608 	if (error == B_OK && type == SOCK_STREAM)
1609 		error = socket_listen(sockets[0], 1);
1610 
1611 	// connect them
1612 	if (error == B_OK) {
1613 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1614 			sockets[0]->address.ss_len);
1615 	}
1616 
1617 	if (error == B_OK) {
1618 		// accept a socket
1619 		if (type == SOCK_STREAM) {
1620 			net_socket* acceptedSocket = NULL;
1621 			error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1622 			if (error == B_OK) {
1623 				// everything worked: close the listener socket
1624 				socket_close(sockets[0]);
1625 				socket_free(sockets[0]);
1626 				sockets[0] = acceptedSocket;
1627 			}
1628 		// connect the other side
1629 		} else {
1630 			error = socket_connect(sockets[0], (sockaddr*)&sockets[1]->address,
1631 				sockets[1]->address.ss_len);
1632 		}
1633 	}
1634 
1635 	if (error != B_OK) {
1636 		// close sockets on error
1637 		for (int i = 0; i < 2; i++) {
1638 			if (sockets[i] != NULL) {
1639 				socket_close(sockets[i]);
1640 				socket_free(sockets[i]);
1641 				sockets[i] = NULL;
1642 			}
1643 		}
1644 	}
1645 
1646 	return error;
1647 }
1648 
1649 
1650 //	#pragma mark -
1651 
1652 
1653 static status_t
1654 socket_std_ops(int32 op, ...)
1655 {
1656 	switch (op) {
1657 		case B_MODULE_INIT:
1658 		{
1659 			new (&sSocketList) SocketList;
1660 			mutex_init(&sSocketLock, "socket list");
1661 
1662 #if ENABLE_DEBUGGER_COMMANDS
1663 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1664 			add_debugger_command("socket", dump_socket, "dumps a socket");
1665 #endif
1666 			return B_OK;
1667 		}
1668 		case B_MODULE_UNINIT:
1669 			ASSERT(sSocketList.IsEmpty());
1670 			mutex_destroy(&sSocketLock);
1671 
1672 #if ENABLE_DEBUGGER_COMMANDS
1673 			remove_debugger_command("socket", dump_socket);
1674 			remove_debugger_command("sockets", dump_sockets);
1675 #endif
1676 			return B_OK;
1677 
1678 		default:
1679 			return B_ERROR;
1680 	}
1681 }
1682 
1683 
1684 net_socket_module_info gNetSocketModule = {
1685 	{
1686 		NET_SOCKET_MODULE_NAME,
1687 		0,
1688 		socket_std_ops
1689 	},
1690 	socket_open,
1691 	socket_close,
1692 	socket_free,
1693 
1694 	socket_control,
1695 
1696 	socket_read_avail,
1697 	socket_send_avail,
1698 
1699 	socket_send_data,
1700 	socket_receive_data,
1701 
1702 	socket_get_option,
1703 	socket_set_option,
1704 
1705 	socket_get_next_stat,
1706 
1707 	// connections
1708 	socket_acquire,
1709 	socket_release,
1710 	socket_spawn_pending,
1711 	socket_dequeue_connected,
1712 	socket_count_connected,
1713 	socket_set_max_backlog,
1714 	socket_has_parent,
1715 	socket_connected,
1716 	socket_aborted,
1717 
1718 	// notifications
1719 	socket_request_notification,
1720 	socket_cancel_notification,
1721 	socket_notify,
1722 
1723 	// standard socket API
1724 	socket_accept,
1725 	socket_bind,
1726 	socket_connect,
1727 	socket_getpeername,
1728 	socket_getsockname,
1729 	socket_getsockopt,
1730 	socket_listen,
1731 	socket_receive,
1732 	socket_send,
1733 	socket_setsockopt,
1734 	socket_shutdown,
1735 	socket_socketpair
1736 };
1737 
1738