xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 94c66b276e92f206678ca2e2c816d2665946afdd)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static status_t
166 create_socket(int family, int type, int protocol, net_socket_private** _socket)
167 {
168 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
169 	if (socket == NULL)
170 		return B_NO_MEMORY;
171 	status_t status = socket->InitCheck();
172 	if (status != B_OK) {
173 		delete socket;
174 		return status;
175 	}
176 
177 	socket->family = family;
178 	socket->type = type;
179 	socket->protocol = protocol;
180 
181 	status = get_domain_protocols(socket);
182 	if (status != B_OK) {
183 		delete socket;
184 		return status;
185 	}
186 
187 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
188 		socket->type, socket->protocol);
189 
190 #ifdef TRACE_SOCKET
191 	net_protocol* current = socket->first_protocol;
192 	for (int i = 0; current != NULL; current = current->next, i++)
193 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
194 #endif
195 
196 	*_socket = socket;
197 	return B_OK;
198 }
199 
200 
201 static status_t
202 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
203 	void* data, size_t dataLen)
204 {
205 	cmsghdr* header = (cmsghdr*)data;
206 
207 	if (dataLen == 0)
208 		return B_OK;
209 
210 	if (socket->first_info->add_ancillary_data == NULL)
211 		return B_NOT_SUPPORTED;
212 
213 	while (true) {
214 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
215 			return B_BAD_VALUE;
216 
217 		status_t status = socket->first_info->add_ancillary_data(
218 			socket->first_protocol, container, header);
219 		if (status != B_OK)
220 			return status;
221 
222 		const size_t alignedLength = CMSG_ALIGN(header->cmsg_len);
223 		if (dataLen <= alignedLength)
224 			break;
225 
226 		dataLen -= alignedLength;
227 		header = (cmsghdr*)((uint8*)header + alignedLength);
228 	}
229 
230 	return B_OK;
231 }
232 
233 
234 static status_t
235 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
236 	msghdr* messageHeader)
237 {
238 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
239 	int dataBufferLen = messageHeader->msg_controllen;
240 
241 	if (container == NULL || dataBuffer == NULL) {
242 		messageHeader->msg_controllen = 0;
243 		return B_OK;
244 	}
245 
246 	ancillary_data_header header;
247 	void* data = NULL;
248 
249 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
250 		if (socket->first_info->process_ancillary_data == NULL)
251 			return B_NOT_SUPPORTED;
252 
253 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
254 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
255 		if (bytesWritten < 0)
256 			return bytesWritten;
257 
258 		dataBuffer += bytesWritten;
259 		dataBufferLen -= bytesWritten;
260 	}
261 
262 	messageHeader->msg_controllen -= dataBufferLen;
263 
264 	return B_OK;
265 }
266 
267 
268 static status_t
269 process_ancillary_data(net_socket* socket,
270 	net_buffer* buffer, msghdr* messageHeader)
271 {
272 	void *dataBuffer = messageHeader->msg_control;
273 	ssize_t bytesWritten;
274 
275 	if (dataBuffer == NULL) {
276 		messageHeader->msg_controllen = 0;
277 		return B_OK;
278 	}
279 
280 	if (socket->first_info->process_ancillary_data_no_container == NULL)
281 		return B_NOT_SUPPORTED;
282 
283 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
284 		socket->first_protocol, buffer, dataBuffer,
285 		messageHeader->msg_controllen);
286 	if (bytesWritten < 0)
287 		return bytesWritten;
288 	messageHeader->msg_controllen = bytesWritten;
289 
290 	return B_OK;
291 }
292 
293 
294 static ssize_t
295 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
296 	size_t length, int flags)
297 {
298 	iovec stackVec = { data, length };
299 	iovec* vecs = header ? header->msg_iov : &stackVec;
300 	int vecCount = header ? header->msg_iovlen : 1;
301 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
302 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
303 
304 	ancillary_data_container* ancillaryData = NULL;
305 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
306 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
307 		addressLen, flags);
308 	if (bytesRead < 0)
309 		return bytesRead;
310 
311 	CObjectDeleter<
312 		ancillary_data_container, void, delete_ancillary_data_container>
313 		ancillaryDataDeleter(ancillaryData);
314 
315 	// process ancillary data
316 	if (header != NULL) {
317 		status_t status = process_ancillary_data(socket, ancillaryData, header);
318 		if (status != B_OK)
319 			return status;
320 
321 		header->msg_flags = 0;
322 	}
323 
324 	return bytesRead;
325 }
326 
327 
328 #if ENABLE_DEBUGGER_COMMANDS
329 
330 
331 static void
332 print_socket_line(net_socket_private* socket, const char* prefix)
333 {
334 	BReference<net_socket_private> parent = socket->parent.GetReference();
335 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
336 		socket->family, socket->type, socket->protocol, socket->owner,
337 		socket->first_protocol, socket->first_info, parent.Get(),
338 		parent.IsSet() ? socket->is_connected ? " (c)" : " (p)" : "");
339 }
340 
341 
342 static int
343 dump_socket(int argc, char** argv)
344 {
345 	if (argc < 2) {
346 		kprintf("usage: %s [address]\n", argv[0]);
347 		return 0;
348 	}
349 
350 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
351 
352 	kprintf("SOCKET %p\n", socket);
353 	kprintf("  family.type.protocol: %d.%d.%d\n",
354 		socket->family, socket->type, socket->protocol);
355 	BReference<net_socket_private> parent = socket->parent.GetReference();
356 	kprintf("  parent:               %p\n", parent.Get());
357 	kprintf("  first protocol:       %p\n", socket->first_protocol);
358 	kprintf("  first module_info:    %p\n", socket->first_info);
359 	kprintf("  options:              %x\n", socket->options);
360 	kprintf("  linger:               %d\n", socket->linger);
361 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
362 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
363 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
364 	kprintf("  is connected:         %d\n", socket->is_connected);
365 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
366 
367 	if (socket->child_count == 0)
368 		return 0;
369 
370 	kprintf("    pending children:\n");
371 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
372 	while (net_socket_private* child = iterator.Next()) {
373 		print_socket_line(child, "      ");
374 	}
375 
376 	kprintf("    connected children:\n");
377 	iterator = socket->connected_children.GetIterator();
378 	while (net_socket_private* child = iterator.Next()) {
379 		print_socket_line(child, "      ");
380 	}
381 
382 	return 0;
383 }
384 
385 
386 static int
387 dump_sockets(int argc, char** argv)
388 {
389 	kprintf("address        kind  owner protocol   module_info parent\n");
390 
391 	SocketList::Iterator iterator = sSocketList.GetIterator();
392 	while (net_socket_private* socket = iterator.Next()) {
393 		print_socket_line(socket, "");
394 
395 		SocketList::Iterator childIterator
396 			= socket->pending_children.GetIterator();
397 		while (net_socket_private* child = childIterator.Next()) {
398 			print_socket_line(child, " ");
399 		}
400 
401 		childIterator = socket->connected_children.GetIterator();
402 		while (net_socket_private* child = childIterator.Next()) {
403 			print_socket_line(child, " ");
404 		}
405 	}
406 
407 	return 0;
408 }
409 
410 
411 #endif	// ENABLE_DEBUGGER_COMMANDS
412 
413 
414 //	#pragma mark -
415 
416 
417 status_t
418 socket_open(int family, int type, int protocol, net_socket** _socket)
419 {
420 	net_socket_private* socket;
421 	status_t status = create_socket(family, type, protocol, &socket);
422 	if (status != B_OK)
423 		return status;
424 
425 	status = socket->first_info->open(socket->first_protocol);
426 	if (status != B_OK) {
427 		delete socket;
428 		return status;
429 	}
430 
431 	socket->owner = team_get_current_team_id();
432 	socket->is_in_socket_list = true;
433 
434 	mutex_lock(&sSocketLock);
435 	sSocketList.Add(socket);
436 	mutex_unlock(&sSocketLock);
437 
438 	*_socket = socket;
439 	return B_OK;
440 }
441 
442 
443 status_t
444 socket_close(net_socket* _socket)
445 {
446 	net_socket_private* socket = (net_socket_private*)_socket;
447 	return socket->first_info->close(socket->first_protocol);
448 }
449 
450 
451 void
452 socket_free(net_socket* _socket)
453 {
454 	net_socket_private* socket = (net_socket_private*)_socket;
455 	socket->first_info->free(socket->first_protocol);
456 	socket->ReleaseReference();
457 }
458 
459 
460 status_t
461 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
462 {
463 	switch (op) {
464 		case FIONREAD:
465 		{
466 			if (data == NULL || (socket->options & SO_ACCEPTCONN) != 0)
467 				return B_BAD_VALUE;
468 
469 			int available = (int)socket_read_avail(socket);
470 			if (available < 0)
471 				available = 0;
472 
473 			if (is_syscall()) {
474 				if (!IS_USER_ADDRESS(data)
475 					|| user_memcpy(data, &available, sizeof(available))
476 						!= B_OK) {
477 					return B_BAD_ADDRESS;
478 				}
479 			} else
480 				*(int*)data = available;
481 
482 			return B_OK;
483 		}
484 
485 		case B_SET_BLOCKING_IO:
486 		case B_SET_NONBLOCKING_IO:
487 		{
488 			int value = op == B_SET_NONBLOCKING_IO;
489 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
490 				sizeof(int));
491 		}
492 	}
493 
494 	return socket->first_info->control(socket->first_protocol,
495 		LEVEL_DRIVER_IOCTL, op, data, &length);
496 }
497 
498 
499 ssize_t
500 socket_read_avail(net_socket* socket)
501 {
502 	return socket->first_info->read_avail(socket->first_protocol);
503 }
504 
505 
506 ssize_t
507 socket_send_avail(net_socket* socket)
508 {
509 	return socket->first_info->send_avail(socket->first_protocol);
510 }
511 
512 
513 status_t
514 socket_send_data(net_socket* socket, net_buffer* buffer)
515 {
516 	return socket->first_info->send_data(socket->first_protocol,
517 		buffer);
518 }
519 
520 
521 status_t
522 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
523 	net_buffer** _buffer)
524 {
525 	status_t status = socket->first_info->read_data(socket->first_protocol,
526 		length, flags, _buffer);
527 	if (status != B_OK)
528 		return status;
529 
530 	if (*_buffer && length < (*_buffer)->size) {
531 		// discard any data behind the amount requested
532 		gNetBufferModule.trim(*_buffer, length);
533 	}
534 
535 	return status;
536 }
537 
538 
539 status_t
540 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
541 {
542 	MutexLocker locker(sSocketLock);
543 
544 	net_socket_private* socket = NULL;
545 	SocketList::Iterator iterator = sSocketList.GetIterator();
546 	uint32 cookie = *_cookie;
547 	uint32 count = 0;
548 
549 	while (true) {
550 		socket = iterator.Next();
551 		if (socket == NULL)
552 			return B_ENTRY_NOT_FOUND;
553 
554 		// TODO: also traverse the pending connections
555 		if (count == cookie)
556 			break;
557 
558 		if (family == -1 || family == socket->family)
559 			count++;
560 	}
561 
562 	*_cookie = count + 1;
563 
564 	stat->family = socket->family;
565 	stat->type = socket->type;
566 	stat->protocol = socket->protocol;
567 	stat->owner = socket->owner;
568 	stat->state[0] = '\0';
569 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
570 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
571 	stat->receive_queue_size = 0;
572 	stat->send_queue_size = 0;
573 
574 	// fill in protocol specific data (if supported by the protocol)
575 	size_t length = sizeof(net_stat);
576 	socket->first_info->control(socket->first_protocol, socket->protocol,
577 		NET_STAT_SOCKET, stat, &length);
578 
579 	return B_OK;
580 }
581 
582 
583 //	#pragma mark - connections
584 
585 
586 bool
587 socket_acquire(net_socket* _socket)
588 {
589 	net_socket_private* socket = (net_socket_private*)_socket;
590 
591 	// During destruction, the socket might still be accessible over its
592 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
593 	// socket anymore -- while not obvious, the endpoint protocol is responsible
594 	// for the proper locking here.
595 	if (socket->CountReferences() == 0)
596 		return false;
597 
598 	socket->AcquireReference();
599 	return true;
600 }
601 
602 
603 bool
604 socket_release(net_socket* _socket)
605 {
606 	net_socket_private* socket = (net_socket_private*)_socket;
607 	return socket->ReleaseReference();
608 }
609 
610 
611 status_t
612 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
613 {
614 	net_socket_private* parent = (net_socket_private*)_parent;
615 
616 	TRACE("%s(%p)\n", __FUNCTION__, parent);
617 
618 	MutexLocker locker(parent->lock);
619 
620 	// We actually accept more pending connections to compensate for those
621 	// that never complete, and also make sure at least a single connection
622 	// can always be accepted
623 	if (parent->child_count > 3 * parent->max_backlog / 2)
624 		return ENOBUFS;
625 
626 	net_socket_private* socket;
627 	status_t status = create_socket(parent->family, parent->type,
628 		parent->protocol, &socket);
629 	if (status != B_OK)
630 		return status;
631 
632 	// inherit parent's properties
633 	socket->send = parent->send;
634 	socket->receive = parent->receive;
635 	socket->options = parent->options & (SO_KEEPALIVE | SO_DONTROUTE | SO_LINGER | SO_OOBINLINE);
636 	socket->linger = parent->linger;
637 	socket->owner = parent->owner;
638 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
639 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
640 
641 	// add to the parent's list of pending connections
642 	parent->pending_children.Add(socket);
643 	socket->parent = parent;
644 	parent->child_count++;
645 
646 	*_socket = socket;
647 	return B_OK;
648 }
649 
650 
651 /*!	Dequeues a connected child from a parent socket.
652 	It also returns a reference with the child socket.
653 */
654 status_t
655 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
656 {
657 	net_socket_private* parent = (net_socket_private*)_parent;
658 
659 	mutex_lock(&parent->lock);
660 
661 	net_socket_private* socket = parent->connected_children.RemoveHead();
662 	if (socket != NULL) {
663 		socket->AcquireReference();
664 		socket->RemoveFromParent();
665 		parent->child_count--;
666 		*_socket = socket;
667 	}
668 
669 	mutex_unlock(&parent->lock);
670 
671 	if (socket == NULL)
672 		return B_ENTRY_NOT_FOUND;
673 
674 	return B_OK;
675 }
676 
677 
678 ssize_t
679 socket_count_connected(net_socket* _parent)
680 {
681 	net_socket_private* parent = (net_socket_private*)_parent;
682 
683 	MutexLocker _(parent->lock);
684 	return parent->connected_children.Count();
685 }
686 
687 
688 status_t
689 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
690 {
691 	net_socket_private* socket = (net_socket_private*)_socket;
692 
693 	// we enforce an upper limit of connections waiting to be accepted
694 	if (backlog > 256)
695 		backlog = 256;
696 
697 	MutexLocker _(socket->lock);
698 
699 	// first remove the pending connections, then the already connected
700 	// ones as needed
701 	net_socket_private* child;
702 	while (socket->child_count > backlog
703 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
704 		child->RemoveFromParent();
705 		socket->child_count--;
706 	}
707 	while (socket->child_count > backlog
708 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
709 		child->RemoveFromParent();
710 		socket->child_count--;
711 	}
712 
713 	socket->max_backlog = backlog;
714 	return B_OK;
715 }
716 
717 
718 /*!	Returns whether or not this socket has a parent. The parent might not be
719 	valid anymore, though.
720 */
721 bool
722 socket_has_parent(net_socket* _socket)
723 {
724 	net_socket_private* socket = (net_socket_private*)_socket;
725 	return socket->parent != NULL;
726 }
727 
728 
729 /*!	The socket has been connected. It will be moved to the connected queue
730 	of its parent socket.
731 */
732 status_t
733 socket_connected(net_socket* _socket)
734 {
735 	net_socket_private* socket = (net_socket_private*)_socket;
736 
737 	TRACE("socket_connected(%p)\n", socket);
738 
739 	if (socket->parent == NULL) {
740 		socket->is_connected = true;
741 		return B_OK;
742 	}
743 
744 	BReference<net_socket_private> parent = socket->parent.GetReference();
745 	if (!parent.IsSet())
746 		return B_BAD_VALUE;
747 
748 	MutexLocker _(parent->lock);
749 
750 	parent->pending_children.Remove(socket);
751 	parent->connected_children.Add(socket);
752 	socket->is_connected = true;
753 
754 	// notify parent
755 	if (parent->select_pool)
756 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
757 
758 	return B_OK;
759 }
760 
761 
762 /*!	The socket has been aborted. Steals the parent's reference, and releases
763 	it.
764 */
765 status_t
766 socket_aborted(net_socket* _socket)
767 {
768 	net_socket_private* socket = (net_socket_private*)_socket;
769 
770 	TRACE("socket_aborted(%p)\n", socket);
771 
772 	BReference<net_socket_private> parent = socket->parent.GetReference();
773 	if (!parent.IsSet())
774 		return B_BAD_VALUE;
775 
776 	MutexLocker _(parent->lock);
777 
778 	if (socket->is_connected)
779 		parent->connected_children.Remove(socket);
780 	else
781 		parent->pending_children.Remove(socket);
782 
783 	parent->child_count--;
784 	socket->RemoveFromParent();
785 
786 	return B_OK;
787 }
788 
789 
790 //	#pragma mark - notifications
791 
792 
793 status_t
794 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
795 {
796 	net_socket_private* socket = (net_socket_private*)_socket;
797 
798 	mutex_lock(&socket->lock);
799 
800 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
801 		event);
802 
803 	mutex_unlock(&socket->lock);
804 
805 	if (status != B_OK)
806 		return status;
807 
808 	// check if the event is already present
809 	// TODO: add support for poll() types
810 
811 	switch (event) {
812 		case B_SELECT_READ:
813 		{
814 			ssize_t available = socket_read_avail(socket);
815 			if ((ssize_t)socket->receive.low_water_mark <= available
816 				|| available < B_OK)
817 				notify_select_event(sync, event);
818 			break;
819 		}
820 		case B_SELECT_WRITE:
821 		{
822 			if ((socket->options & SO_ACCEPTCONN) != 0)
823 				break;
824 
825 			ssize_t available = socket_send_avail(socket);
826 			if ((ssize_t)socket->send.low_water_mark <= available
827 				|| available < B_OK)
828 				notify_select_event(sync, event);
829 			break;
830 		}
831 		case B_SELECT_ERROR:
832 			if (socket->error != B_OK)
833 				notify_select_event(sync, event);
834 			break;
835 	}
836 
837 	return B_OK;
838 }
839 
840 
841 status_t
842 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
843 {
844 	net_socket_private* socket = (net_socket_private*)_socket;
845 
846 	MutexLocker _(socket->lock);
847 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
848 }
849 
850 
851 status_t
852 socket_notify(net_socket* _socket, uint8 event, int32 value)
853 {
854 	net_socket_private* socket = (net_socket_private*)_socket;
855 	bool notify = true;
856 
857 	switch (event) {
858 		case B_SELECT_READ:
859 			if ((ssize_t)socket->receive.low_water_mark > value
860 				&& value >= B_OK)
861 				notify = false;
862 			break;
863 
864 		case B_SELECT_WRITE:
865 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
866 				notify = false;
867 			break;
868 
869 		case B_SELECT_ERROR:
870 			socket->error = value;
871 			break;
872 	}
873 
874 	MutexLocker _(socket->lock);
875 
876 	if (notify && socket->select_pool != NULL) {
877 		notify_select_event_pool(socket->select_pool, event);
878 
879 		if (event == B_SELECT_ERROR) {
880 			// always notify read/write on error
881 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
882 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
883 		}
884 	}
885 
886 	return B_OK;
887 }
888 
889 
890 //	#pragma mark - standard socket API
891 
892 
893 int
894 socket_accept(net_socket* socket, struct sockaddr* address,
895 	socklen_t* _addressLength, net_socket** _acceptedSocket)
896 {
897 	if ((socket->options & SO_ACCEPTCONN) == 0)
898 		return B_BAD_VALUE;
899 
900 	net_socket* accepted;
901 	status_t status = socket->first_info->accept(socket->first_protocol,
902 		&accepted);
903 	if (status != B_OK)
904 		return status;
905 
906 	if (address && *_addressLength > 0) {
907 		memcpy(address, &accepted->peer, min_c(*_addressLength,
908 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
909 		*_addressLength = accepted->peer.ss_len;
910 	}
911 
912 	*_acceptedSocket = accepted;
913 	return B_OK;
914 }
915 
916 
917 int
918 socket_bind(net_socket* socket, const struct sockaddr* address,
919 	socklen_t addressLength)
920 {
921 	sockaddr empty;
922 	if (address == NULL) {
923 		// special - try to bind to an empty address, like INADDR_ANY
924 		memset(&empty, 0, sizeof(sockaddr));
925 		empty.sa_len = sizeof(sockaddr);
926 		empty.sa_family = socket->family;
927 
928 		address = &empty;
929 		addressLength = sizeof(sockaddr);
930 	}
931 
932 	if (socket->address.ss_len != 0)
933 		return B_BAD_VALUE;
934 
935 	memcpy(&socket->address, address, sizeof(sockaddr));
936 	socket->address.ss_len = sizeof(sockaddr_storage);
937 
938 	status_t status = socket->first_info->bind(socket->first_protocol,
939 		(sockaddr*)address);
940 	if (status != B_OK) {
941 		// clear address again, as binding failed
942 		socket->address.ss_len = 0;
943 	}
944 
945 	return status;
946 }
947 
948 
949 int
950 socket_connect(net_socket* socket, const struct sockaddr* address,
951 	socklen_t addressLength)
952 {
953 	if (address == NULL || addressLength == 0)
954 		return ENETUNREACH;
955 
956 	if (socket->address.ss_len == 0) {
957 		// try to bind first
958 		status_t status = socket_bind(socket, NULL, 0);
959 		if (status != B_OK)
960 			return status;
961 	}
962 
963 	return socket->first_info->connect(socket->first_protocol, address);
964 }
965 
966 
967 int
968 socket_getpeername(net_socket* _socket, struct sockaddr* address,
969 	socklen_t* _addressLength)
970 {
971 	net_socket_private* socket = (net_socket_private*)_socket;
972 	BReference<net_socket_private> parent = socket->parent.GetReference();
973 
974 	if ((!parent.IsSet() && !socket->is_connected) || socket->peer.ss_len == 0)
975 		return ENOTCONN;
976 
977 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
978 	*_addressLength = socket->peer.ss_len;
979 	return B_OK;
980 }
981 
982 
983 int
984 socket_getsockname(net_socket* socket, struct sockaddr* address,
985 	socklen_t* _addressLength)
986 {
987 	if (socket->address.ss_len == 0) {
988 		struct sockaddr buffer;
989 		memset(&buffer, 0, sizeof(buffer));
990 		buffer.sa_family = socket->family;
991 
992 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
993 		*_addressLength = sizeof(buffer);
994 		return B_OK;
995 	}
996 
997 	memcpy(address, &socket->address, min_c(*_addressLength,
998 		socket->address.ss_len));
999 	*_addressLength = socket->address.ss_len;
1000 	return B_OK;
1001 }
1002 
1003 
1004 status_t
1005 socket_get_option(net_socket* socket, int level, int option, void* value,
1006 	int* _length)
1007 {
1008 	if (level != SOL_SOCKET)
1009 		return ENOPROTOOPT;
1010 
1011 	switch (option) {
1012 		case SO_SNDBUF:
1013 		{
1014 			uint32* size = (uint32*)value;
1015 			*size = socket->send.buffer_size;
1016 			*_length = sizeof(uint32);
1017 			return B_OK;
1018 		}
1019 
1020 		case SO_RCVBUF:
1021 		{
1022 			uint32* size = (uint32*)value;
1023 			*size = socket->receive.buffer_size;
1024 			*_length = sizeof(uint32);
1025 			return B_OK;
1026 		}
1027 
1028 		case SO_SNDLOWAT:
1029 		{
1030 			uint32* size = (uint32*)value;
1031 			*size = socket->send.low_water_mark;
1032 			*_length = sizeof(uint32);
1033 			return B_OK;
1034 		}
1035 
1036 		case SO_RCVLOWAT:
1037 		{
1038 			uint32* size = (uint32*)value;
1039 			*size = socket->receive.low_water_mark;
1040 			*_length = sizeof(uint32);
1041 			return B_OK;
1042 		}
1043 
1044 		case SO_RCVTIMEO:
1045 		case SO_SNDTIMEO:
1046 		{
1047 			if (*_length < (int)sizeof(struct timeval))
1048 				return B_BAD_VALUE;
1049 
1050 			bigtime_t timeout;
1051 			if (option == SO_SNDTIMEO)
1052 				timeout = socket->send.timeout;
1053 			else
1054 				timeout = socket->receive.timeout;
1055 			if (timeout == B_INFINITE_TIMEOUT)
1056 				timeout = 0;
1057 
1058 			struct timeval* timeval = (struct timeval*)value;
1059 			timeval->tv_sec = timeout / 1000000LL;
1060 			timeval->tv_usec = timeout % 1000000LL;
1061 
1062 			*_length = sizeof(struct timeval);
1063 			return B_OK;
1064 		}
1065 
1066 		case SO_NONBLOCK:
1067 		{
1068 			int32* _set = (int32*)value;
1069 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1070 			*_length = sizeof(int32);
1071 			return B_OK;
1072 		}
1073 
1074 		case SO_ACCEPTCONN:
1075 		case SO_BROADCAST:
1076 		case SO_DEBUG:
1077 		case SO_DONTROUTE:
1078 		case SO_KEEPALIVE:
1079 		case SO_OOBINLINE:
1080 		case SO_REUSEADDR:
1081 		case SO_REUSEPORT:
1082 		case SO_USELOOPBACK:
1083 		{
1084 			int32* _set = (int32*)value;
1085 			*_set = (socket->options & option) != 0;
1086 			*_length = sizeof(int32);
1087 			return B_OK;
1088 		}
1089 
1090 		case SO_TYPE:
1091 		{
1092 			int32* _set = (int32*)value;
1093 			*_set = socket->type;
1094 			*_length = sizeof(int32);
1095 			return B_OK;
1096 		}
1097 
1098 		case SO_ERROR:
1099 		{
1100 			int32* _set = (int32*)value;
1101 			*_set = socket->error;
1102 			*_length = sizeof(int32);
1103 
1104 			socket->error = B_OK;
1105 				// clear error upon retrieval
1106 			return B_OK;
1107 		}
1108 
1109 		default:
1110 			break;
1111 	}
1112 
1113 	dprintf("socket_getsockopt: unknown option %d\n", option);
1114 	return ENOPROTOOPT;
1115 }
1116 
1117 
1118 int
1119 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1120 	int* _length)
1121 {
1122 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1123 		level, option, value, _length);
1124 }
1125 
1126 
1127 int
1128 socket_listen(net_socket* socket, int backlog)
1129 {
1130 	status_t status = socket->first_info->listen(socket->first_protocol,
1131 		backlog);
1132 	if (status == B_OK)
1133 		socket->options |= SO_ACCEPTCONN;
1134 
1135 	return status;
1136 }
1137 
1138 
1139 ssize_t
1140 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1141 	int flags)
1142 {
1143 	const int originalFlags = flags;
1144 
1145 	// MSG_NOSIGNAL is only meaningful for send(), not receive(), but it is
1146 	// sometimes specified anyway. Mask it off to avoid unnecessary errors.
1147 	flags &= ~MSG_NOSIGNAL;
1148 
1149 	// If the protocol sports read_data_no_buffer() we use it.
1150 	if (socket->first_info->read_data_no_buffer != NULL)
1151 		return socket_receive_no_buffer(socket, header, data, length, flags);
1152 
1153 	// Mask off flags handled in this function.
1154 	flags &= ~(MSG_TRUNC);
1155 
1156 	size_t totalLength = length;
1157 	if (header != NULL) {
1158 		ASSERT(data == header->msg_iov[0].iov_base);
1159 
1160 		// calculate the length considering all of the extra buffers
1161 		for (int i = 1; i < header->msg_iovlen; i++)
1162 			totalLength += header->msg_iov[i].iov_len;
1163 	}
1164 
1165 	net_buffer* buffer;
1166 	status_t status = socket->first_info->read_data(
1167 		socket->first_protocol, totalLength, flags, &buffer);
1168 	if (status != B_OK)
1169 		return status;
1170 
1171 	// process ancillary data
1172 	if (header != NULL) {
1173 		if (buffer != NULL && header->msg_control != NULL) {
1174 			ancillary_data_container* container
1175 				= gNetBufferModule.get_ancillary_data(buffer);
1176 			if (container != NULL)
1177 				status = process_ancillary_data(socket, container, header);
1178 			else
1179 				status = process_ancillary_data(socket, buffer, header);
1180 			if (status != B_OK) {
1181 				gNetBufferModule.free(buffer);
1182 				return status;
1183 			}
1184 		} else
1185 			header->msg_controllen = 0;
1186 	}
1187 
1188 	// TODO: - returning a NULL buffer when received 0 bytes
1189 	//         may not make much sense as we still need the address
1190 
1191 	size_t nameLen = 0;
1192 	if (header != NULL) {
1193 		// TODO: - consider the control buffer options
1194 		nameLen = header->msg_namelen;
1195 		header->msg_namelen = 0;
1196 		header->msg_flags = 0;
1197 	}
1198 
1199 	if (buffer == NULL)
1200 		return 0;
1201 
1202 	const size_t bytesReceived = buffer->size;
1203 	size_t bytesCopied = 0;
1204 
1205 	size_t toRead = min_c(bytesReceived, length);
1206 	status = gNetBufferModule.read(buffer, 0, data, toRead);
1207 	if (status != B_OK) {
1208 		gNetBufferModule.free(buffer);
1209 
1210 		if (status == B_BAD_ADDRESS)
1211 			return status;
1212 		return ENOBUFS;
1213 	}
1214 
1215 	// if first copy was a success, proceed to following copies as required
1216 	bytesCopied += toRead;
1217 
1218 	if (header != NULL) {
1219 		// We start at iovec[1] as { data, length } is iovec[0].
1220 		for (int i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1221 			iovec& vec = header->msg_iov[i];
1222 			toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1223 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1224 					toRead) < B_OK) {
1225 				break;
1226 			}
1227 
1228 			bytesCopied += toRead;
1229 		}
1230 
1231 		if (header->msg_name != NULL) {
1232 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1233 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1234 		}
1235 	}
1236 
1237 	gNetBufferModule.free(buffer);
1238 
1239 	if (bytesCopied < bytesReceived) {
1240 		if (header != NULL)
1241 			header->msg_flags = MSG_TRUNC;
1242 
1243 		if ((originalFlags & MSG_TRUNC) != 0)
1244 			return bytesReceived;
1245 	}
1246 
1247 	return bytesCopied;
1248 }
1249 
1250 
1251 ssize_t
1252 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1253 	int flags)
1254 {
1255 	const bool nosignal = ((flags & MSG_NOSIGNAL) != 0);
1256 	flags &= ~MSG_NOSIGNAL;
1257 
1258 	size_t bytesLeft = length;
1259 	if (length > SSIZE_MAX)
1260 		return B_BAD_VALUE;
1261 
1262 	ancillary_data_container* ancillaryData = NULL;
1263 	CObjectDeleter<
1264 		ancillary_data_container, void, delete_ancillary_data_container>
1265 		ancillaryDataDeleter;
1266 
1267 	const sockaddr* address = NULL;
1268 	socklen_t addressLength = 0;
1269 	if (header != NULL) {
1270 		address = (const sockaddr*)header->msg_name;
1271 		addressLength = header->msg_namelen;
1272 
1273 		// get the ancillary data
1274 		if (header->msg_control != NULL) {
1275 			ancillaryData = create_ancillary_data_container();
1276 			if (ancillaryData == NULL)
1277 				return B_NO_MEMORY;
1278 			ancillaryDataDeleter.SetTo(ancillaryData);
1279 
1280 			status_t status = add_ancillary_data(socket, ancillaryData,
1281 				(cmsghdr*)header->msg_control, header->msg_controllen);
1282 			if (status != B_OK)
1283 				return status;
1284 		}
1285 	}
1286 
1287 	if (addressLength == 0)
1288 		address = NULL;
1289 	else if (address == NULL)
1290 		return B_BAD_VALUE;
1291 
1292 	if (socket->peer.ss_len != 0) {
1293 		if (address != NULL)
1294 			return EISCONN;
1295 
1296 		// socket is connected, we use that address
1297 		address = (struct sockaddr*)&socket->peer;
1298 		addressLength = socket->peer.ss_len;
1299 	}
1300 
1301 	if (address == NULL || addressLength == 0) {
1302 		// don't know where to send to:
1303 		return EDESTADDRREQ;
1304 	}
1305 
1306 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1307 		&& bytesLeft > socket->send.buffer_size)
1308 		return EMSGSIZE;
1309 
1310 	if (socket->address.ss_len == 0) {
1311 		// try to bind first
1312 		status_t status = socket_bind(socket, NULL, 0);
1313 		if (status != B_OK)
1314 			return status;
1315 	}
1316 
1317 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1318 	if (socket->first_info->send_data_no_buffer != NULL) {
1319 		iovec stackVec = { (void*)data, length };
1320 		iovec* vecs = header ? header->msg_iov : &stackVec;
1321 		int vecCount = header ? header->msg_iovlen : 1;
1322 
1323 		ssize_t written = socket->first_info->send_data_no_buffer(
1324 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1325 			addressLength, flags);
1326 
1327 		// we only send signals when called from userland
1328 		if (written == EPIPE && is_syscall() && !nosignal)
1329 			send_signal(find_thread(NULL), SIGPIPE);
1330 
1331 		if (written > 0)
1332 			ancillaryDataDeleter.Detach();
1333 		return written;
1334 	}
1335 
1336 	// By convention, if a header is given, the (data, length) equals the first
1337 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1338 	// the size of the remaining ones.
1339 	if (header != NULL) {
1340 		if (header->msg_iovlen <= 1) {
1341 			header = NULL;
1342 		} else {
1343 			for (int i = 1; i < header->msg_iovlen; i++)
1344 				bytesLeft += header->msg_iov[i].iov_len;
1345 		}
1346 	}
1347 
1348 	ssize_t bytesSent = 0;
1349 	size_t vecOffset = 0;
1350 	uint32 vecIndex = 0;
1351 
1352 	while (bytesLeft > 0) {
1353 		// TODO: useful, maybe even computed header space!
1354 		net_buffer* buffer = gNetBufferModule.create(256);
1355 		if (buffer == NULL)
1356 			return ENOBUFS;
1357 
1358 		while (buffer->size < socket->send.buffer_size
1359 			&& buffer->size < bytesLeft) {
1360 			if (vecIndex > 0 && vecOffset == 0) {
1361 				// retrieve next iovec buffer from header
1362 				data = header->msg_iov[vecIndex].iov_base;
1363 				length = header->msg_iov[vecIndex].iov_len;
1364 			}
1365 
1366 			size_t bytes = length;
1367 			if (buffer->size + bytes > socket->send.buffer_size)
1368 				bytes = socket->send.buffer_size - buffer->size;
1369 
1370 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1371 				gNetBufferModule.free(buffer);
1372 				return ENOBUFS;
1373 			}
1374 
1375 			if (bytes != length) {
1376 				// partial send
1377 				vecOffset = bytes;
1378 				length -= vecOffset;
1379 				data = (uint8*)data + vecOffset;
1380 			} else if (header != NULL) {
1381 				// proceed with next buffer, if any
1382 				vecOffset = 0;
1383 				vecIndex++;
1384 
1385 				if (vecIndex >= (uint32)header->msg_iovlen)
1386 					break;
1387 			}
1388 		}
1389 
1390 		// attach ancillary data to the first buffer
1391 		status_t status;
1392 		if (ancillaryData != NULL) {
1393 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1394 			ancillaryDataDeleter.Detach();
1395 			ancillaryData = NULL;
1396 		}
1397 
1398 		size_t bufferSize = buffer->size;
1399 		buffer->msg_flags = flags;
1400 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1401 		memcpy(buffer->destination, address, addressLength);
1402 		buffer->destination->sa_len = addressLength;
1403 
1404 		status = socket->first_info->send_data(socket->first_protocol, buffer);
1405 		if (status != B_OK) {
1406 			// we only send signals when called from userland
1407 			if (status == EPIPE && is_syscall() && !nosignal)
1408 				send_signal(find_thread(NULL), SIGPIPE);
1409 
1410 			size_t sizeAfterSend = buffer->size;
1411 			gNetBufferModule.free(buffer);
1412 
1413 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1414 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1415 				// this appears to be a partial write
1416 				return bytesSent + (bufferSize - sizeAfterSend);
1417 			}
1418 			return status;
1419 		}
1420 
1421 		bytesLeft -= bufferSize;
1422 		bytesSent += bufferSize;
1423 	}
1424 
1425 	return bytesSent;
1426 }
1427 
1428 
1429 status_t
1430 socket_set_option(net_socket* socket, int level, int option, const void* value,
1431 	int length)
1432 {
1433 	if (level != SOL_SOCKET)
1434 		return ENOPROTOOPT;
1435 
1436 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1437 
1438 	switch (option) {
1439 		// TODO: implement other options!
1440 		case SO_LINGER:
1441 		{
1442 			if (length < (int)sizeof(struct linger))
1443 				return B_BAD_VALUE;
1444 
1445 			struct linger* linger = (struct linger*)value;
1446 			if (linger->l_onoff) {
1447 				socket->options |= SO_LINGER;
1448 				socket->linger = linger->l_linger;
1449 			} else {
1450 				socket->options &= ~SO_LINGER;
1451 				socket->linger = 0;
1452 			}
1453 			return B_OK;
1454 		}
1455 
1456 		case SO_SNDBUF:
1457 			if (length != sizeof(uint32))
1458 				return B_BAD_VALUE;
1459 
1460 			socket->send.buffer_size = *(const uint32*)value;
1461 			return B_OK;
1462 
1463 		case SO_RCVBUF:
1464 			if (length != sizeof(uint32))
1465 				return B_BAD_VALUE;
1466 
1467 			socket->receive.buffer_size = *(const uint32*)value;
1468 			return B_OK;
1469 
1470 		case SO_SNDLOWAT:
1471 			if (length != sizeof(uint32))
1472 				return B_BAD_VALUE;
1473 
1474 			socket->send.low_water_mark = *(const uint32*)value;
1475 			return B_OK;
1476 
1477 		case SO_RCVLOWAT:
1478 			if (length != sizeof(uint32))
1479 				return B_BAD_VALUE;
1480 
1481 			socket->receive.low_water_mark = *(const uint32*)value;
1482 			return B_OK;
1483 
1484 		case SO_RCVTIMEO:
1485 		case SO_SNDTIMEO:
1486 		{
1487 			if (length != sizeof(struct timeval))
1488 				return B_BAD_VALUE;
1489 
1490 			const struct timeval* timeval = (const struct timeval*)value;
1491 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1492 			if (timeout == 0)
1493 				timeout = B_INFINITE_TIMEOUT;
1494 
1495 			if (option == SO_SNDTIMEO)
1496 				socket->send.timeout = timeout;
1497 			else
1498 				socket->receive.timeout = timeout;
1499 			return B_OK;
1500 		}
1501 
1502 		case SO_NONBLOCK:
1503 			if (length != sizeof(int32))
1504 				return B_BAD_VALUE;
1505 
1506 			if (*(const int32*)value) {
1507 				socket->send.timeout = 0;
1508 				socket->receive.timeout = 0;
1509 			} else {
1510 				socket->send.timeout = B_INFINITE_TIMEOUT;
1511 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1512 			}
1513 			return B_OK;
1514 
1515 		case SO_BROADCAST:
1516 		case SO_DEBUG:
1517 		case SO_DONTROUTE:
1518 		case SO_KEEPALIVE:
1519 		case SO_OOBINLINE:
1520 		case SO_REUSEADDR:
1521 		case SO_REUSEPORT:
1522 		case SO_USELOOPBACK:
1523 			if (length != sizeof(int32))
1524 				return B_BAD_VALUE;
1525 
1526 			if (*(const int32*)value)
1527 				socket->options |= option;
1528 			else
1529 				socket->options &= ~option;
1530 			return B_OK;
1531 
1532 		case SO_BINDTODEVICE:
1533 		{
1534 			if (length != sizeof(uint32))
1535 				return B_BAD_VALUE;
1536 
1537 			// TODO: we might want to check if the device exists at all
1538 			// (although it doesn't really harm when we don't)
1539 			socket->bound_to_device = *(const uint32*)value;
1540 			return B_OK;
1541 		}
1542 
1543 		default:
1544 			break;
1545 	}
1546 
1547 	dprintf("socket_setsockopt: unknown option %d\n", option);
1548 	return ENOPROTOOPT;
1549 }
1550 
1551 
1552 int
1553 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1554 	int length)
1555 {
1556 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1557 		level, option, value, length);
1558 }
1559 
1560 
1561 int
1562 socket_shutdown(net_socket* socket, int direction)
1563 {
1564 	return socket->first_info->shutdown(socket->first_protocol, direction);
1565 }
1566 
1567 
1568 status_t
1569 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1570 {
1571 	sockets[0] = NULL;
1572 	sockets[1] = NULL;
1573 
1574 	// create sockets
1575 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1576 	if (error != B_OK)
1577 		return error;
1578 
1579 	error = socket_open(family, type, protocol, &sockets[1]);
1580 
1581 	// bind one
1582 	if (error == B_OK)
1583 		error = socket_bind(sockets[0], NULL, 0);
1584 
1585 	// start listening
1586 	if (error == B_OK && type == SOCK_STREAM)
1587 		error = socket_listen(sockets[0], 1);
1588 
1589 	// connect them
1590 	if (error == B_OK) {
1591 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1592 			sockets[0]->address.ss_len);
1593 	}
1594 
1595 	if (error == B_OK) {
1596 		// accept a socket
1597 		if (type == SOCK_STREAM) {
1598 			net_socket* acceptedSocket = NULL;
1599 			error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1600 			if (error == B_OK) {
1601 				// everything worked: close the listener socket
1602 				socket_close(sockets[0]);
1603 				socket_free(sockets[0]);
1604 				sockets[0] = acceptedSocket;
1605 			}
1606 		// connect the other side
1607 		} else {
1608 			error = socket_connect(sockets[0], (sockaddr*)&sockets[1]->address,
1609 				sockets[1]->address.ss_len);
1610 		}
1611 	}
1612 
1613 	if (error != B_OK) {
1614 		// close sockets on error
1615 		for (int i = 0; i < 2; i++) {
1616 			if (sockets[i] != NULL) {
1617 				socket_close(sockets[i]);
1618 				socket_free(sockets[i]);
1619 				sockets[i] = NULL;
1620 			}
1621 		}
1622 	}
1623 
1624 	return error;
1625 }
1626 
1627 
1628 //	#pragma mark -
1629 
1630 
1631 static status_t
1632 socket_std_ops(int32 op, ...)
1633 {
1634 	switch (op) {
1635 		case B_MODULE_INIT:
1636 		{
1637 			new (&sSocketList) SocketList;
1638 			mutex_init(&sSocketLock, "socket list");
1639 
1640 #if ENABLE_DEBUGGER_COMMANDS
1641 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1642 			add_debugger_command("socket", dump_socket, "dumps a socket");
1643 #endif
1644 			return B_OK;
1645 		}
1646 		case B_MODULE_UNINIT:
1647 			ASSERT(sSocketList.IsEmpty());
1648 			mutex_destroy(&sSocketLock);
1649 
1650 #if ENABLE_DEBUGGER_COMMANDS
1651 			remove_debugger_command("socket", dump_socket);
1652 			remove_debugger_command("sockets", dump_sockets);
1653 #endif
1654 			return B_OK;
1655 
1656 		default:
1657 			return B_ERROR;
1658 	}
1659 }
1660 
1661 
1662 net_socket_module_info gNetSocketModule = {
1663 	{
1664 		NET_SOCKET_MODULE_NAME,
1665 		0,
1666 		socket_std_ops
1667 	},
1668 	socket_open,
1669 	socket_close,
1670 	socket_free,
1671 
1672 	socket_control,
1673 
1674 	socket_read_avail,
1675 	socket_send_avail,
1676 
1677 	socket_send_data,
1678 	socket_receive_data,
1679 
1680 	socket_get_option,
1681 	socket_set_option,
1682 
1683 	socket_get_next_stat,
1684 
1685 	// connections
1686 	socket_acquire,
1687 	socket_release,
1688 	socket_spawn_pending,
1689 	socket_dequeue_connected,
1690 	socket_count_connected,
1691 	socket_set_max_backlog,
1692 	socket_has_parent,
1693 	socket_connected,
1694 	socket_aborted,
1695 
1696 	// notifications
1697 	socket_request_notification,
1698 	socket_cancel_notification,
1699 	socket_notify,
1700 
1701 	// standard socket API
1702 	socket_accept,
1703 	socket_bind,
1704 	socket_connect,
1705 	socket_getpeername,
1706 	socket_getsockname,
1707 	socket_getsockopt,
1708 	socket_listen,
1709 	socket_receive,
1710 	socket_send,
1711 	socket_setsockopt,
1712 	socket_shutdown,
1713 	socket_socketpair
1714 };
1715 
1716