xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 76e9533e9e9945f62053bf7c737df2f3e1735bbd)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		WeakReferenceable<net_socket_private> {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	WeakPointer<net_socket_private>* parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	: WeakReferenceable<net_socket_private>(this),
86 	parent(NULL),
87 	owner(-1),
88 	max_backlog(0),
89 	child_count(0),
90 	select_pool(NULL),
91 	is_connected(false),
92 	is_in_socket_list(false)
93 {
94 	first_protocol = NULL;
95 	first_info = NULL;
96 	options = 0;
97 	linger = 0;
98 	bound_to_device = 0;
99 	error = 0;
100 
101 	address.ss_len = 0;
102 	peer.ss_len = 0;
103 
104 	mutex_init(&lock, "socket");
105 
106 	// set defaults (may be overridden by the protocols)
107 	send.buffer_size = 65535;
108 	send.low_water_mark = 1;
109 	send.timeout = B_INFINITE_TIMEOUT;
110 	receive.buffer_size = 65535;
111 	receive.low_water_mark = 1;
112 	receive.timeout = B_INFINITE_TIMEOUT;
113 }
114 
115 
116 net_socket_private::~net_socket_private()
117 {
118 	TRACE("delete net_socket %p\n", this);
119 
120 	if (parent != NULL)
121 		panic("socket still has a parent!");
122 
123 	if (is_in_socket_list) {
124 		MutexLocker _(sSocketLock);
125 		sSocketList.Remove(this);
126 	}
127 
128 	mutex_lock(&lock);
129 
130 	// also delete all children of this socket
131 	while (net_socket_private* child = pending_children.RemoveHead()) {
132 		child->RemoveFromParent();
133 	}
134 	while (net_socket_private* child = connected_children.RemoveHead()) {
135 		child->RemoveFromParent();
136 	}
137 
138 	mutex_unlock(&lock);
139 
140 	put_domain_protocols(this);
141 
142 	mutex_destroy(&lock);
143 }
144 
145 
146 void
147 net_socket_private::RemoveFromParent()
148 {
149 	ASSERT(!is_in_socket_list && parent != NULL);
150 
151 	parent->ReleaseReference();
152 	parent = NULL;
153 
154 	mutex_lock(&sSocketLock);
155 	sSocketList.Add(this);
156 	mutex_unlock(&sSocketLock);
157 
158 	is_in_socket_list = true;
159 
160 	ReleaseReference();
161 }
162 
163 
164 //	#pragma mark -
165 
166 
167 static size_t
168 compute_user_iovec_length(iovec* userVec, uint32 count)
169 {
170 	size_t length = 0;
171 
172 	for (uint32 i = 0; i < count; i++) {
173 		iovec vec;
174 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
175 			return 0;
176 
177 		length += vec.iov_len;
178 	}
179 
180 	return length;
181 }
182 
183 
184 static status_t
185 create_socket(int family, int type, int protocol, net_socket_private** _socket)
186 {
187 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
188 	if (socket == NULL)
189 		return B_NO_MEMORY;
190 
191 	socket->family = family;
192 	socket->type = type;
193 	socket->protocol = protocol;
194 
195 	status_t status = get_domain_protocols(socket);
196 	if (status != B_OK) {
197 		delete socket;
198 		return status;
199 	}
200 
201 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
202 		socket->type, socket->protocol);
203 
204 #ifdef TRACE_SOCKET
205 	net_protocol* current = socket->first_protocol;
206 	for (int i = 0; current != NULL; current = current->next, i++)
207 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
208 #endif
209 
210 	*_socket = socket;
211 	return B_OK;
212 }
213 
214 
215 static status_t
216 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
217 	void* data, size_t dataLen)
218 {
219 	cmsghdr* header = (cmsghdr*)data;
220 
221 	while (dataLen > 0) {
222 		if (header->cmsg_len < sizeof(cmsghdr) || header->cmsg_len > dataLen)
223 			return B_BAD_VALUE;
224 
225 		if (socket->first_info->add_ancillary_data == NULL)
226 			return B_NOT_SUPPORTED;
227 
228 		status_t status = socket->first_info->add_ancillary_data(
229 			socket->first_protocol, container, header);
230 		if (status != B_OK)
231 			return status;
232 
233 		dataLen -= _ALIGN(header->cmsg_len);
234 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
235 	}
236 
237 	return B_OK;
238 }
239 
240 
241 static status_t
242 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
243 	msghdr* messageHeader)
244 {
245 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
246 	int dataBufferLen = messageHeader->msg_controllen;
247 
248 	if (container == NULL || dataBuffer == NULL) {
249 		messageHeader->msg_controllen = 0;
250 		return B_OK;
251 	}
252 
253 	ancillary_data_header header;
254 	void* data = NULL;
255 
256 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
257 		if (socket->first_info->process_ancillary_data == NULL)
258 			return B_NOT_SUPPORTED;
259 
260 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
261 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
262 		if (bytesWritten < 0)
263 			return bytesWritten;
264 
265 		dataBuffer += bytesWritten;
266 		dataBufferLen -= bytesWritten;
267 	}
268 
269 	messageHeader->msg_controllen -= dataBufferLen;
270 
271 	return B_OK;
272 }
273 
274 
275 static status_t
276 process_ancillary_data(net_socket* socket,
277 	net_buffer* buffer, msghdr* messageHeader)
278 {
279 	void *dataBuffer = messageHeader->msg_control;
280 	ssize_t bytesWritten;
281 
282 	if (dataBuffer == NULL) {
283 		messageHeader->msg_controllen = 0;
284 		return B_OK;
285 	}
286 
287 	if (socket->first_info->process_ancillary_data_no_container == NULL)
288 		return B_NOT_SUPPORTED;
289 
290 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
291 		socket->first_protocol, buffer, dataBuffer,
292 		messageHeader->msg_controllen);
293 	if (bytesWritten < 0)
294 		return bytesWritten;
295 	messageHeader->msg_controllen = bytesWritten;
296 
297 	return B_OK;
298 }
299 
300 
301 static ssize_t
302 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
303 	size_t length, int flags)
304 {
305 	iovec stackVec = { data, length };
306 	iovec* vecs = header ? header->msg_iov : &stackVec;
307 	int vecCount = header ? header->msg_iovlen : 1;
308 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
309 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
310 
311 	ancillary_data_container* ancillaryData = NULL;
312 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
313 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
314 		addressLen);
315 	if (bytesRead < 0)
316 		return bytesRead;
317 
318 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(ancillaryData,
319 		&delete_ancillary_data_container);
320 
321 	// process ancillary data
322 	if (header != NULL) {
323 		status_t status = process_ancillary_data(socket, ancillaryData, header);
324 		if (status != B_OK)
325 			return status;
326 
327 		header->msg_flags = 0;
328 	}
329 
330 	return bytesRead;
331 }
332 
333 
334 #if ENABLE_DEBUGGER_COMMANDS
335 
336 
337 static void
338 print_socket_line(net_socket_private* socket, const char* prefix)
339 {
340 	kprintf("%s%p %2d.%2d.%2d %6ld %p %p  %p%s\n", prefix, socket,
341 		socket->family, socket->type, socket->protocol, socket->owner,
342 		socket->first_protocol, socket->first_info, socket->parent,
343 		socket->parent != NULL ? socket->is_connected ? " (c)" : " (p)" : "");
344 }
345 
346 
347 static int
348 dump_socket(int argc, char** argv)
349 {
350 	if (argc < 2) {
351 		kprintf("usage: %s [address]\n", argv[0]);
352 		return 0;
353 	}
354 
355 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
356 
357 	kprintf("SOCKET %p\n", socket);
358 	kprintf("  family.type.protocol: %d.%d.%d\n",
359 		socket->family, socket->type, socket->protocol);
360 	WeakReference<net_socket_private> parent = socket->parent;
361 	kprintf("  parent:               %p (%p)\n", parent.Get(), socket->parent);
362 	kprintf("  first protocol:       %p\n", socket->first_protocol);
363 	kprintf("  first module_info:    %p\n", socket->first_info);
364 	kprintf("  options:              %x\n", socket->options);
365 	kprintf("  linger:               %d\n", socket->linger);
366 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
367 	kprintf("  owner:                %ld\n", socket->owner);
368 	kprintf("  max backlog:          %ld\n", socket->max_backlog);
369 	kprintf("  is connected:         %d\n", socket->is_connected);
370 	kprintf("  child_count:          %lu\n", socket->child_count);
371 
372 	if (socket->child_count == 0)
373 		return 0;
374 
375 	kprintf("    pending children:\n");
376 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
377 	while (net_socket_private* child = iterator.Next()) {
378 		print_socket_line(child, "      ");
379 	}
380 
381 	kprintf("    connected children:\n");
382 	iterator = socket->connected_children.GetIterator();
383 	while (net_socket_private* child = iterator.Next()) {
384 		print_socket_line(child, "      ");
385 	}
386 
387 	return 0;
388 }
389 
390 
391 static int
392 dump_sockets(int argc, char** argv)
393 {
394 	kprintf("address        kind  owner protocol   module_info parent\n");
395 
396 	SocketList::Iterator iterator = sSocketList.GetIterator();
397 	while (net_socket_private* socket = iterator.Next()) {
398 		print_socket_line(socket, "");
399 
400 		SocketList::Iterator childIterator
401 			= socket->pending_children.GetIterator();
402 		while (net_socket_private* child = childIterator.Next()) {
403 			print_socket_line(child, " ");
404 		}
405 
406 		childIterator = socket->connected_children.GetIterator();
407 		while (net_socket_private* child = childIterator.Next()) {
408 			print_socket_line(child, " ");
409 		}
410 	}
411 
412 	return 0;
413 }
414 
415 
416 #endif	// ENABLE_DEBUGGER_COMMANDS
417 
418 
419 //	#pragma mark -
420 
421 
422 status_t
423 socket_open(int family, int type, int protocol, net_socket** _socket)
424 {
425 	net_socket_private* socket;
426 	status_t status = create_socket(family, type, protocol, &socket);
427 	if (status != B_OK)
428 		return status;
429 
430 	status = socket->first_info->open(socket->first_protocol);
431 	if (status != B_OK) {
432 		delete socket;
433 		return status;
434 	}
435 
436 	socket->owner = team_get_current_team_id();
437 	socket->is_in_socket_list = true;
438 
439 	mutex_lock(&sSocketLock);
440 	sSocketList.Add(socket);
441 	mutex_unlock(&sSocketLock);
442 
443 	*_socket = socket;
444 	return B_OK;
445 }
446 
447 
448 status_t
449 socket_close(net_socket* _socket)
450 {
451 	net_socket_private* socket = (net_socket_private*)_socket;
452 	return socket->first_info->close(socket->first_protocol);
453 }
454 
455 
456 void
457 socket_free(net_socket* _socket)
458 {
459 	net_socket_private* socket = (net_socket_private*)_socket;
460 	socket->first_info->free(socket->first_protocol);
461 	socket->ReleaseReference();
462 }
463 
464 
465 status_t
466 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
467 	size_t* _length)
468 {
469 	return -1;
470 }
471 
472 
473 status_t
474 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
475 	size_t* _length)
476 {
477 	if (socket->peer.ss_len == 0)
478 		return ECONNRESET;
479 
480 	if (socket->address.ss_len == 0) {
481 		// try to bind first
482 		status_t status = socket_bind(socket, NULL, 0);
483 		if (status != B_OK)
484 			return status;
485 	}
486 
487 	// TODO: useful, maybe even computed header space!
488 	net_buffer* buffer = gNetBufferModule.create(256);
489 	if (buffer == NULL)
490 		return ENOBUFS;
491 
492 	// copy data into buffer
493 
494 	for (uint32 i = 0; i < vecCount; i++) {
495 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
496 				vecs[i].iov_len) < B_OK) {
497 			gNetBufferModule.free(buffer);
498 			return ENOBUFS;
499 		}
500 	}
501 
502 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
503 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
504 	size_t size = buffer->size;
505 
506 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
507 		buffer);
508 	if (bytesWritten < B_OK) {
509 		if (buffer->size != size) {
510 			// this appears to be a partial write
511 			*_length = size - buffer->size;
512 		}
513 		gNetBufferModule.free(buffer);
514 		return bytesWritten;
515 	}
516 
517 	*_length = bytesWritten;
518 	return B_OK;
519 }
520 
521 
522 status_t
523 socket_control(net_socket* socket, int32 op, void* data, size_t length)
524 {
525 	switch (op) {
526 		case FIONBIO:
527 		{
528 			if (data == NULL)
529 				return B_BAD_VALUE;
530 
531 			int value;
532 			if (is_syscall()) {
533 				if (!IS_USER_ADDRESS(data)
534 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
535 					return B_BAD_ADDRESS;
536 				}
537 			} else
538 				value = *(int*)data;
539 
540 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
541 				sizeof(int));
542 		}
543 
544 		case FIONREAD:
545 		{
546 			if (data == NULL)
547 				return B_BAD_VALUE;
548 
549 			ssize_t available = socket_read_avail(socket);
550 			if (available < B_OK)
551 				return available;
552 
553 			if (is_syscall()) {
554 				if (!IS_USER_ADDRESS(data)
555 					|| user_memcpy(data, &available, sizeof(ssize_t)) != B_OK) {
556 					return B_BAD_ADDRESS;
557 				}
558 			} else
559 				*(ssize_t *)data = available;
560 
561 			return B_OK;
562 		}
563 
564 		case B_SET_BLOCKING_IO:
565 		case B_SET_NONBLOCKING_IO:
566 		{
567 			int value = op == B_SET_NONBLOCKING_IO;
568 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
569 				sizeof(int));
570 		}
571 	}
572 
573 	return socket->first_info->control(socket->first_protocol,
574 		LEVEL_DRIVER_IOCTL, op, data, &length);
575 }
576 
577 
578 ssize_t
579 socket_read_avail(net_socket* socket)
580 {
581 	return socket->first_info->read_avail(socket->first_protocol);
582 }
583 
584 
585 ssize_t
586 socket_send_avail(net_socket* socket)
587 {
588 	return socket->first_info->send_avail(socket->first_protocol);
589 }
590 
591 
592 status_t
593 socket_send_data(net_socket* socket, net_buffer* buffer)
594 {
595 	return socket->first_info->send_data(socket->first_protocol,
596 		buffer);
597 }
598 
599 
600 status_t
601 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
602 	net_buffer** _buffer)
603 {
604 	status_t status = socket->first_info->read_data(socket->first_protocol,
605 		length, flags, _buffer);
606 	if (status != B_OK)
607 		return status;
608 
609 	if (*_buffer && length < (*_buffer)->size) {
610 		// discard any data behind the amount requested
611 		gNetBufferModule.trim(*_buffer, length);
612 	}
613 
614 	return status;
615 }
616 
617 
618 status_t
619 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
620 {
621 	MutexLocker locker(sSocketLock);
622 
623 	net_socket_private* socket = NULL;
624 	SocketList::Iterator iterator = sSocketList.GetIterator();
625 	uint32 cookie = *_cookie;
626 	uint32 count = 0;
627 
628 	while (true) {
629 		socket = iterator.Next();
630 		if (socket == NULL)
631 			return B_ENTRY_NOT_FOUND;
632 
633 		// TODO: also traverse the pending connections
634 		if (count == cookie)
635 			break;
636 
637 		if (family == -1 || family == socket->family)
638 			count++;
639 	}
640 
641 	*_cookie = count + 1;
642 
643 	stat->family = socket->family;
644 	stat->type = socket->type;
645 	stat->protocol = socket->protocol;
646 	stat->owner = socket->owner;
647 	stat->state[0] = '\0';
648 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
649 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
650 	stat->receive_queue_size = 0;
651 	stat->send_queue_size = 0;
652 
653 	// fill in protocol specific data (if supported by the protocol)
654 	size_t length = sizeof(net_stat);
655 	socket->first_info->control(socket->first_protocol, socket->protocol,
656 		NET_STAT_SOCKET, stat, &length);
657 
658 	return B_OK;
659 }
660 
661 
662 //	#pragma mark - connections
663 
664 
665 bool
666 socket_acquire(net_socket* _socket)
667 {
668 	net_socket_private* socket = (net_socket_private*)_socket;
669 
670 	// During destruction, the socket might still be accessible over its
671 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
672 	// socket anymore -- while not obvious, the endpoint protocol is responsible
673 	// for the proper locking here.
674 	if (socket->CountReferences() == 0)
675 		return false;
676 
677 	socket->AcquireReference();
678 	return true;
679 }
680 
681 
682 bool
683 socket_release(net_socket* _socket)
684 {
685 	net_socket_private* socket = (net_socket_private*)_socket;
686 	return socket->ReleaseReference();
687 }
688 
689 
690 status_t
691 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
692 {
693 	net_socket_private* parent = (net_socket_private*)_parent;
694 
695 	TRACE("%s(%p)\n", __FUNCTION__, parent);
696 
697 	MutexLocker locker(parent->lock);
698 
699 	// We actually accept more pending connections to compensate for those
700 	// that never complete, and also make sure at least a single connection
701 	// can always be accepted
702 	if (parent->child_count > 3 * parent->max_backlog / 2)
703 		return ENOBUFS;
704 
705 	net_socket_private* socket;
706 	status_t status = create_socket(parent->family, parent->type,
707 		parent->protocol, &socket);
708 	if (status != B_OK)
709 		return status;
710 
711 	// inherit parent's properties
712 	socket->send = parent->send;
713 	socket->receive = parent->receive;
714 	socket->options = parent->options & ~SO_ACCEPTCONN;
715 	socket->linger = parent->linger;
716 	socket->owner = parent->owner;
717 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
718 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
719 
720 	// add to the parent's list of pending connections
721 	parent->pending_children.Add(socket);
722 	socket->parent = parent->GetWeakPointer();
723 	parent->child_count++;
724 
725 	*_socket = socket;
726 	return B_OK;
727 }
728 
729 
730 /*!	Dequeues a connected child from a parent socket.
731 	It also returns a reference with the child socket.
732 */
733 status_t
734 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
735 {
736 	net_socket_private* parent = (net_socket_private*)_parent;
737 
738 	mutex_lock(&parent->lock);
739 
740 	net_socket_private* socket = parent->connected_children.RemoveHead();
741 	if (socket != NULL) {
742 		socket->AcquireReference();
743 		socket->RemoveFromParent();
744 		parent->child_count--;
745 		*_socket = socket;
746 	}
747 
748 	mutex_unlock(&parent->lock);
749 
750 	if (socket == NULL)
751 		return B_ENTRY_NOT_FOUND;
752 
753 	return B_OK;
754 }
755 
756 
757 ssize_t
758 socket_count_connected(net_socket* _parent)
759 {
760 	net_socket_private* parent = (net_socket_private*)_parent;
761 
762 	MutexLocker _(parent->lock);
763 	return parent->connected_children.Count();
764 }
765 
766 
767 status_t
768 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
769 {
770 	net_socket_private* socket = (net_socket_private*)_socket;
771 
772 	// we enforce an upper limit of connections waiting to be accepted
773 	if (backlog > 256)
774 		backlog = 256;
775 
776 	MutexLocker _(socket->lock);
777 
778 	// first remove the pending connections, then the already connected
779 	// ones as needed
780 	net_socket_private* child;
781 	while (socket->child_count > backlog
782 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
783 		child->RemoveFromParent();
784 		socket->child_count--;
785 	}
786 	while (socket->child_count > backlog
787 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
788 		child->RemoveFromParent();
789 		socket->child_count--;
790 	}
791 
792 	socket->max_backlog = backlog;
793 	return B_OK;
794 }
795 
796 
797 /*!	Returns whether or not this socket has a parent. The parent might not be
798 	valid anymore, though.
799 */
800 bool
801 socket_has_parent(net_socket* _socket)
802 {
803 	net_socket_private* socket = (net_socket_private*)_socket;
804 	return socket->parent != NULL;
805 }
806 
807 
808 /*!	The socket has been connected. It will be moved to the connected queue
809 	of its parent socket.
810 */
811 status_t
812 socket_connected(net_socket* _socket)
813 {
814 	net_socket_private* socket = (net_socket_private*)_socket;
815 
816 	TRACE("socket_connected(%p)\n", socket);
817 
818 	WeakReference<net_socket_private> parent = socket->parent;
819 	if (parent.Get() == NULL)
820 		return B_BAD_VALUE;
821 
822 	MutexLocker _(parent->lock);
823 
824 	parent->pending_children.Remove(socket);
825 	parent->connected_children.Add(socket);
826 	socket->is_connected = true;
827 
828 	// notify parent
829 	if (parent->select_pool)
830 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
831 
832 	return B_OK;
833 }
834 
835 
836 /*!	The socket has been aborted. Steals the parent's reference, and releases
837 	it.
838 */
839 status_t
840 socket_aborted(net_socket* _socket)
841 {
842 	net_socket_private* socket = (net_socket_private*)_socket;
843 
844 	TRACE("socket_aborted(%p)\n", socket);
845 
846 	WeakReference<net_socket_private> parent = socket->parent;
847 	if (parent.Get() == NULL)
848 		return B_BAD_VALUE;
849 
850 	MutexLocker _(parent->lock);
851 
852 	if (socket->is_connected)
853 		parent->connected_children.Remove(socket);
854 	else
855 		parent->pending_children.Remove(socket);
856 
857 	parent->child_count--;
858 	socket->RemoveFromParent();
859 
860 	return B_OK;
861 }
862 
863 
864 //	#pragma mark - notifications
865 
866 
867 status_t
868 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
869 {
870 	net_socket_private* socket = (net_socket_private*)_socket;
871 
872 	mutex_lock(&socket->lock);
873 
874 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
875 		event);
876 
877 	mutex_unlock(&socket->lock);
878 
879 	if (status != B_OK)
880 		return status;
881 
882 	// check if the event is already present
883 	// TODO: add support for poll() types
884 
885 	switch (event) {
886 		case B_SELECT_READ:
887 		{
888 			ssize_t available = socket_read_avail(socket);
889 			if ((ssize_t)socket->receive.low_water_mark <= available
890 				|| available < B_OK)
891 				notify_select_event(sync, event);
892 			break;
893 		}
894 		case B_SELECT_WRITE:
895 		{
896 			ssize_t available = socket_send_avail(socket);
897 			if ((ssize_t)socket->send.low_water_mark <= available
898 				|| available < B_OK)
899 				notify_select_event(sync, event);
900 			break;
901 		}
902 		case B_SELECT_ERROR:
903 			// TODO: B_SELECT_ERROR condition!
904 			break;
905 	}
906 
907 	return B_OK;
908 }
909 
910 
911 status_t
912 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
913 {
914 	net_socket_private* socket = (net_socket_private*)_socket;
915 
916 	MutexLocker _(socket->lock);
917 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
918 }
919 
920 
921 status_t
922 socket_notify(net_socket* _socket, uint8 event, int32 value)
923 {
924 	net_socket_private* socket = (net_socket_private*)_socket;
925 	bool notify = true;
926 
927 	switch (event) {
928 		case B_SELECT_READ:
929 			if ((ssize_t)socket->receive.low_water_mark > value
930 				&& value >= B_OK)
931 				notify = false;
932 			break;
933 
934 		case B_SELECT_WRITE:
935 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
936 				notify = false;
937 			break;
938 
939 		case B_SELECT_ERROR:
940 			socket->error = value;
941 			break;
942 	}
943 
944 	MutexLocker _(socket->lock);
945 
946 	if (notify && socket->select_pool != NULL) {
947 		notify_select_event_pool(socket->select_pool, event);
948 
949 		if (event == B_SELECT_ERROR) {
950 			// always notify read/write on error
951 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
952 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
953 		}
954 	}
955 
956 	return B_OK;
957 }
958 
959 
960 //	#pragma mark - standard socket API
961 
962 
963 int
964 socket_accept(net_socket* socket, struct sockaddr* address,
965 	socklen_t* _addressLength, net_socket** _acceptedSocket)
966 {
967 	if ((socket->options & SO_ACCEPTCONN) == 0)
968 		return B_BAD_VALUE;
969 
970 	net_socket* accepted;
971 	status_t status = socket->first_info->accept(socket->first_protocol,
972 		&accepted);
973 	if (status != B_OK)
974 		return status;
975 
976 	if (address && *_addressLength > 0) {
977 		memcpy(address, &accepted->peer, min_c(*_addressLength,
978 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
979 		*_addressLength = accepted->peer.ss_len;
980 	}
981 
982 	*_acceptedSocket = accepted;
983 	return B_OK;
984 }
985 
986 
987 int
988 socket_bind(net_socket* socket, const struct sockaddr* address,
989 	socklen_t addressLength)
990 {
991 	sockaddr empty;
992 	if (address == NULL) {
993 		// special - try to bind to an empty address, like INADDR_ANY
994 		memset(&empty, 0, sizeof(sockaddr));
995 		empty.sa_len = sizeof(sockaddr);
996 		empty.sa_family = socket->family;
997 
998 		address = &empty;
999 		addressLength = sizeof(sockaddr);
1000 	}
1001 
1002 	if (socket->address.ss_len != 0) {
1003 		status_t status = socket->first_info->unbind(socket->first_protocol,
1004 			(sockaddr*)&socket->address);
1005 		if (status != B_OK)
1006 			return status;
1007 	}
1008 
1009 	memcpy(&socket->address, address, sizeof(sockaddr));
1010 	socket->address.ss_len = sizeof(sockaddr_storage);
1011 
1012 	status_t status = socket->first_info->bind(socket->first_protocol,
1013 		(sockaddr*)address);
1014 	if (status != B_OK) {
1015 		// clear address again, as binding failed
1016 		socket->address.ss_len = 0;
1017 	}
1018 
1019 	return status;
1020 }
1021 
1022 
1023 int
1024 socket_connect(net_socket* socket, const struct sockaddr* address,
1025 	socklen_t addressLength)
1026 {
1027 	if (address == NULL || addressLength == 0)
1028 		return ENETUNREACH;
1029 
1030 	if (socket->address.ss_len == 0) {
1031 		// try to bind first
1032 		status_t status = socket_bind(socket, NULL, 0);
1033 		if (status != B_OK)
1034 			return status;
1035 	}
1036 
1037 	return socket->first_info->connect(socket->first_protocol, address);
1038 }
1039 
1040 
1041 int
1042 socket_getpeername(net_socket* socket, struct sockaddr* address,
1043 	socklen_t* _addressLength)
1044 {
1045 	if (socket->peer.ss_len == 0)
1046 		return ENOTCONN;
1047 
1048 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1049 	*_addressLength = socket->peer.ss_len;
1050 	return B_OK;
1051 }
1052 
1053 
1054 int
1055 socket_getsockname(net_socket* socket, struct sockaddr* address,
1056 	socklen_t* _addressLength)
1057 {
1058 	if (socket->address.ss_len == 0)
1059 		return ENOTCONN;
1060 
1061 	memcpy(address, &socket->address, min_c(*_addressLength,
1062 		socket->address.ss_len));
1063 	*_addressLength = socket->address.ss_len;
1064 	return B_OK;
1065 }
1066 
1067 
1068 status_t
1069 socket_get_option(net_socket* socket, int level, int option, void* value,
1070 	int* _length)
1071 {
1072 	if (level != SOL_SOCKET)
1073 		return ENOPROTOOPT;
1074 
1075 	switch (option) {
1076 		case SO_SNDBUF:
1077 		{
1078 			uint32* size = (uint32*)value;
1079 			*size = socket->send.buffer_size;
1080 			*_length = sizeof(uint32);
1081 			return B_OK;
1082 		}
1083 
1084 		case SO_RCVBUF:
1085 		{
1086 			uint32* size = (uint32*)value;
1087 			*size = socket->receive.buffer_size;
1088 			*_length = sizeof(uint32);
1089 			return B_OK;
1090 		}
1091 
1092 		case SO_SNDLOWAT:
1093 		{
1094 			uint32* size = (uint32*)value;
1095 			*size = socket->send.low_water_mark;
1096 			*_length = sizeof(uint32);
1097 			return B_OK;
1098 		}
1099 
1100 		case SO_RCVLOWAT:
1101 		{
1102 			uint32* size = (uint32*)value;
1103 			*size = socket->receive.low_water_mark;
1104 			*_length = sizeof(uint32);
1105 			return B_OK;
1106 		}
1107 
1108 		case SO_RCVTIMEO:
1109 		case SO_SNDTIMEO:
1110 		{
1111 			if (*_length < (int)sizeof(struct timeval))
1112 				return B_BAD_VALUE;
1113 
1114 			bigtime_t timeout;
1115 			if (option == SO_SNDTIMEO)
1116 				timeout = socket->send.timeout;
1117 			else
1118 				timeout = socket->receive.timeout;
1119 			if (timeout == B_INFINITE_TIMEOUT)
1120 				timeout = 0;
1121 
1122 			struct timeval* timeval = (struct timeval*)value;
1123 			timeval->tv_sec = timeout / 1000000LL;
1124 			timeval->tv_usec = timeout % 1000000LL;
1125 
1126 			*_length = sizeof(struct timeval);
1127 			return B_OK;
1128 		}
1129 
1130 		case SO_NONBLOCK:
1131 		{
1132 			int32* _set = (int32*)value;
1133 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1134 			*_length = sizeof(int32);
1135 			return B_OK;
1136 		}
1137 
1138 		case SO_ACCEPTCONN:
1139 		case SO_BROADCAST:
1140 		case SO_DEBUG:
1141 		case SO_DONTROUTE:
1142 		case SO_KEEPALIVE:
1143 		case SO_OOBINLINE:
1144 		case SO_REUSEADDR:
1145 		case SO_REUSEPORT:
1146 		case SO_USELOOPBACK:
1147 		{
1148 			int32* _set = (int32*)value;
1149 			*_set = (socket->options & option) != 0;
1150 			*_length = sizeof(int32);
1151 			return B_OK;
1152 		}
1153 
1154 		case SO_TYPE:
1155 		{
1156 			int32* _set = (int32*)value;
1157 			*_set = socket->type;
1158 			*_length = sizeof(int32);
1159 			return B_OK;
1160 		}
1161 
1162 		case SO_ERROR:
1163 		{
1164 			int32* _set = (int32*)value;
1165 			*_set = socket->error;
1166 			*_length = sizeof(int32);
1167 
1168 			socket->error = B_OK;
1169 				// clear error upon retrieval
1170 			return B_OK;
1171 		}
1172 
1173 		default:
1174 			break;
1175 	}
1176 
1177 	dprintf("socket_getsockopt: unknown option %d\n", option);
1178 	return ENOPROTOOPT;
1179 }
1180 
1181 
1182 int
1183 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1184 	int* _length)
1185 {
1186 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1187 		level, option, value, _length);
1188 }
1189 
1190 
1191 int
1192 socket_listen(net_socket* socket, int backlog)
1193 {
1194 	status_t status = socket->first_info->listen(socket->first_protocol,
1195 		backlog);
1196 	if (status == B_OK)
1197 		socket->options |= SO_ACCEPTCONN;
1198 
1199 	return status;
1200 }
1201 
1202 
1203 ssize_t
1204 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1205 	int flags)
1206 {
1207 	// If the protocol sports read_data_no_buffer() we use it.
1208 	if (socket->first_info->read_data_no_buffer != NULL)
1209 		return socket_receive_no_buffer(socket, header, data, length, flags);
1210 
1211 	size_t totalLength = length;
1212 	net_buffer* buffer;
1213 	int i;
1214 
1215 	// the convention to this function is that have header been
1216 	// present, { data, length } would have been iovec[0] and is
1217 	// always considered like that
1218 
1219 	if (header) {
1220 		// calculate the length considering all of the extra buffers
1221 		for (i = 1; i < header->msg_iovlen; i++)
1222 			totalLength += header->msg_iov[i].iov_len;
1223 	}
1224 
1225 	status_t status = socket->first_info->read_data(
1226 		socket->first_protocol, totalLength, flags, &buffer);
1227 	if (status != B_OK)
1228 		return status;
1229 
1230 	// process ancillary data
1231 	if (header != NULL) {
1232 		if (buffer != NULL && header->msg_control != NULL) {
1233 			ancillary_data_container* container
1234 				= gNetBufferModule.get_ancillary_data(buffer);
1235 			if (container != NULL)
1236 				status = process_ancillary_data(socket, container, header);
1237 			else
1238 				status = process_ancillary_data(socket, buffer, header);
1239 			if (status != B_OK) {
1240 				gNetBufferModule.free(buffer);
1241 				return status;
1242 			}
1243 		} else
1244 			header->msg_controllen = 0;
1245 	}
1246 
1247 	// TODO: - returning a NULL buffer when received 0 bytes
1248 	//         may not make much sense as we still need the address
1249 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1250 
1251 	size_t nameLen = 0;
1252 
1253 	if (header) {
1254 		// TODO: - consider the control buffer options
1255 		nameLen = header->msg_namelen;
1256 		header->msg_namelen = 0;
1257 		header->msg_flags = 0;
1258 	}
1259 
1260 	if (buffer == NULL)
1261 		return 0;
1262 
1263 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1264 
1265 	length = min_c(bytesReceived, length);
1266 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1267 		gNetBufferModule.free(buffer);
1268 		return ENOBUFS;
1269 	}
1270 
1271 	// if first copy was a success, proceed to following
1272 	// copies as required
1273 	bytesCopied += length;
1274 
1275 	if (header) {
1276 		// we only start considering at iovec[1]
1277 		// as { data, length } is iovec[0]
1278 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1279 			iovec& vec = header->msg_iov[i];
1280 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1281 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1282 					toRead) < B_OK) {
1283 				break;
1284 			}
1285 
1286 			bytesCopied += toRead;
1287 		}
1288 
1289 		if (header->msg_name != NULL) {
1290 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1291 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1292 		}
1293 	}
1294 
1295 	gNetBufferModule.free(buffer);
1296 
1297 	if (bytesCopied < bytesReceived) {
1298 		if (header)
1299 			header->msg_flags = MSG_TRUNC;
1300 
1301 		if (flags & MSG_TRUNC)
1302 			return bytesReceived;
1303 	}
1304 
1305 	return bytesCopied;
1306 }
1307 
1308 
1309 ssize_t
1310 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1311 	int flags)
1312 {
1313 	const sockaddr* address = NULL;
1314 	socklen_t addressLength = 0;
1315 	size_t bytesLeft = length;
1316 
1317 	if (length > SSIZE_MAX)
1318 		return B_BAD_VALUE;
1319 
1320 	ancillary_data_container* ancillaryData = NULL;
1321 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(NULL,
1322 		&delete_ancillary_data_container);
1323 
1324 	if (header != NULL) {
1325 		address = (const sockaddr*)header->msg_name;
1326 		addressLength = header->msg_namelen;
1327 
1328 		// get the ancillary data
1329 		if (header->msg_control != NULL) {
1330 			ancillaryData = create_ancillary_data_container();
1331 			if (ancillaryData == NULL)
1332 				return B_NO_MEMORY;
1333 			ancillaryDataDeleter.SetTo(ancillaryData);
1334 
1335 			status_t status = add_ancillary_data(socket, ancillaryData,
1336 				(cmsghdr*)header->msg_control, header->msg_controllen);
1337 			if (status != B_OK)
1338 				return status;
1339 		}
1340 	}
1341 
1342 	if (addressLength == 0)
1343 		address = NULL;
1344 	else if (address == NULL)
1345 		return B_BAD_VALUE;
1346 
1347 	if (socket->peer.ss_len != 0) {
1348 		if (address != NULL)
1349 			return EISCONN;
1350 
1351 		// socket is connected, we use that address
1352 		address = (struct sockaddr*)&socket->peer;
1353 		addressLength = socket->peer.ss_len;
1354 	}
1355 
1356 	if (address == NULL || addressLength == 0) {
1357 		// don't know where to send to:
1358 		return EDESTADDRREQ;
1359 	}
1360 
1361 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1362 		&& bytesLeft > socket->send.buffer_size)
1363 		return EMSGSIZE;
1364 
1365 	if (socket->address.ss_len == 0) {
1366 		// try to bind first
1367 		status_t status = socket_bind(socket, NULL, 0);
1368 		if (status != B_OK)
1369 			return status;
1370 	}
1371 
1372 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1373 	if (socket->first_info->send_data_no_buffer != NULL) {
1374 		iovec stackVec = { (void*)data, length };
1375 		iovec* vecs = header ? header->msg_iov : &stackVec;
1376 		int vecCount = header ? header->msg_iovlen : 1;
1377 
1378 		ssize_t written = socket->first_info->send_data_no_buffer(
1379 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1380 			addressLength);
1381 		if (written > 0)
1382 			ancillaryDataDeleter.Detach();
1383 		return written;
1384 	}
1385 
1386 	// By convention, if a header is given, the (data, length) equals the first
1387 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1388 	// the size of the remaining ones.
1389 	if (header != NULL) {
1390 		if (header->msg_iovlen <= 1)
1391 			header = NULL;
1392 		else {
1393 // TODO: The iovecs have already been copied to kernel space. Simplify!
1394 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1395 				header->msg_iovlen - 1);
1396 		}
1397 	}
1398 
1399 	ssize_t bytesSent = 0;
1400 	size_t vecOffset = 0;
1401 	uint32 vecIndex = 0;
1402 
1403 	while (bytesLeft > 0) {
1404 		// TODO: useful, maybe even computed header space!
1405 		net_buffer* buffer = gNetBufferModule.create(256);
1406 		if (buffer == NULL)
1407 			return ENOBUFS;
1408 
1409 		while (buffer->size < socket->send.buffer_size
1410 			&& buffer->size < bytesLeft) {
1411 			if (vecIndex > 0 && vecOffset == 0) {
1412 				// retrieve next iovec buffer from header
1413 				iovec vec;
1414 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1415 						< B_OK) {
1416 					gNetBufferModule.free(buffer);
1417 					return B_BAD_ADDRESS;
1418 				}
1419 
1420 				data = vec.iov_base;
1421 				length = vec.iov_len;
1422 			}
1423 
1424 			size_t bytes = length;
1425 			if (buffer->size + bytes > socket->send.buffer_size)
1426 				bytes = socket->send.buffer_size - buffer->size;
1427 
1428 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1429 				gNetBufferModule.free(buffer);
1430 				return ENOBUFS;
1431 			}
1432 
1433 			if (bytes != length) {
1434 				// partial send
1435 				vecOffset = bytes;
1436 				length -= vecOffset;
1437 				data = (uint8*)data + vecOffset;
1438 			} else if (header != NULL) {
1439 				// proceed with next buffer, if any
1440 				vecOffset = 0;
1441 				vecIndex++;
1442 
1443 				if (vecIndex >= (uint32)header->msg_iovlen)
1444 					break;
1445 			}
1446 		}
1447 
1448 		// attach ancillary data to the first buffer
1449 		status_t status = B_OK;
1450 		if (ancillaryData != NULL) {
1451 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1452 			ancillaryDataDeleter.Detach();
1453 			ancillaryData = NULL;
1454 		}
1455 
1456 		size_t bufferSize = buffer->size;
1457 		buffer->flags = flags;
1458 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1459 		memcpy(buffer->destination, address, addressLength);
1460 		buffer->destination->sa_len = addressLength;
1461 
1462 		if (status == B_OK) {
1463 			status = socket->first_info->send_data(socket->first_protocol,
1464 				buffer);
1465 		}
1466 		if (status != B_OK) {
1467 			size_t sizeAfterSend = buffer->size;
1468 			gNetBufferModule.free(buffer);
1469 
1470 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1471 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1472 				// this appears to be a partial write
1473 				return bytesSent + (bufferSize - sizeAfterSend);
1474 			}
1475 			return status;
1476 		}
1477 
1478 		bytesLeft -= bufferSize;
1479 		bytesSent += bufferSize;
1480 	}
1481 
1482 	return bytesSent;
1483 }
1484 
1485 
1486 status_t
1487 socket_set_option(net_socket* socket, int level, int option, const void* value,
1488 	int length)
1489 {
1490 	if (level != SOL_SOCKET)
1491 		return ENOPROTOOPT;
1492 
1493 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1494 
1495 	switch (option) {
1496 		// TODO: implement other options!
1497 		case SO_LINGER:
1498 		{
1499 			if (length < (int)sizeof(struct linger))
1500 				return B_BAD_VALUE;
1501 
1502 			struct linger* linger = (struct linger*)value;
1503 			if (linger->l_onoff) {
1504 				socket->options |= SO_LINGER;
1505 				socket->linger = linger->l_linger;
1506 			} else {
1507 				socket->options &= ~SO_LINGER;
1508 				socket->linger = 0;
1509 			}
1510 			return B_OK;
1511 		}
1512 
1513 		case SO_SNDBUF:
1514 			if (length != sizeof(uint32))
1515 				return B_BAD_VALUE;
1516 
1517 			socket->send.buffer_size = *(const uint32*)value;
1518 			return B_OK;
1519 
1520 		case SO_RCVBUF:
1521 			if (length != sizeof(uint32))
1522 				return B_BAD_VALUE;
1523 
1524 			socket->receive.buffer_size = *(const uint32*)value;
1525 			return B_OK;
1526 
1527 		case SO_SNDLOWAT:
1528 			if (length != sizeof(uint32))
1529 				return B_BAD_VALUE;
1530 
1531 			socket->send.low_water_mark = *(const uint32*)value;
1532 			return B_OK;
1533 
1534 		case SO_RCVLOWAT:
1535 			if (length != sizeof(uint32))
1536 				return B_BAD_VALUE;
1537 
1538 			socket->receive.low_water_mark = *(const uint32*)value;
1539 			return B_OK;
1540 
1541 		case SO_RCVTIMEO:
1542 		case SO_SNDTIMEO:
1543 		{
1544 			if (length != sizeof(struct timeval))
1545 				return B_BAD_VALUE;
1546 
1547 			const struct timeval* timeval = (const struct timeval*)value;
1548 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1549 			if (timeout == 0)
1550 				timeout = B_INFINITE_TIMEOUT;
1551 
1552 			if (option == SO_SNDTIMEO)
1553 				socket->send.timeout = timeout;
1554 			else
1555 				socket->receive.timeout = timeout;
1556 			return B_OK;
1557 		}
1558 
1559 		case SO_NONBLOCK:
1560 			if (length != sizeof(int32))
1561 				return B_BAD_VALUE;
1562 
1563 			if (*(const int32*)value) {
1564 				socket->send.timeout = 0;
1565 				socket->receive.timeout = 0;
1566 			} else {
1567 				socket->send.timeout = B_INFINITE_TIMEOUT;
1568 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1569 			}
1570 			return B_OK;
1571 
1572 		case SO_BROADCAST:
1573 		case SO_DEBUG:
1574 		case SO_DONTROUTE:
1575 		case SO_KEEPALIVE:
1576 		case SO_OOBINLINE:
1577 		case SO_REUSEADDR:
1578 		case SO_REUSEPORT:
1579 		case SO_USELOOPBACK:
1580 			if (length != sizeof(int32))
1581 				return B_BAD_VALUE;
1582 
1583 			if (*(const int32*)value)
1584 				socket->options |= option;
1585 			else
1586 				socket->options &= ~option;
1587 			return B_OK;
1588 
1589 		case SO_BINDTODEVICE:
1590 		{
1591 			if (length != sizeof(uint32))
1592 				return B_BAD_VALUE;
1593 
1594 			// TODO: we might want to check if the device exists at all
1595 			// (although it doesn't really harm when we don't)
1596 			socket->bound_to_device = *(const uint32*)value;
1597 			return B_OK;
1598 		}
1599 
1600 		default:
1601 			break;
1602 	}
1603 
1604 	dprintf("socket_setsockopt: unknown option %d\n", option);
1605 	return ENOPROTOOPT;
1606 }
1607 
1608 
1609 int
1610 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1611 	int length)
1612 {
1613 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1614 		level, option, value, length);
1615 }
1616 
1617 
1618 int
1619 socket_shutdown(net_socket* socket, int direction)
1620 {
1621 	return socket->first_info->shutdown(socket->first_protocol, direction);
1622 }
1623 
1624 
1625 status_t
1626 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1627 {
1628 	sockets[0] = NULL;
1629 	sockets[1] = NULL;
1630 
1631 	// create sockets
1632 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1633 	if (error != B_OK)
1634 		return error;
1635 
1636 	if (error == B_OK)
1637 		error = socket_open(family, type, protocol, &sockets[1]);
1638 
1639 	// bind one
1640 	if (error == B_OK)
1641 		error = socket_bind(sockets[0], NULL, 0);
1642 
1643 	// start listening
1644 	if (error == B_OK)
1645 		error = socket_listen(sockets[0], 1);
1646 
1647 	// connect them
1648 	if (error == B_OK) {
1649 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1650 			sockets[0]->address.ss_len);
1651 	}
1652 
1653 	// accept a socket
1654 	net_socket* acceptedSocket = NULL;
1655 	if (error == B_OK)
1656 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1657 
1658 	if (error == B_OK) {
1659 		// everything worked: close the listener socket
1660 		socket_close(sockets[0]);
1661 		socket_free(sockets[0]);
1662 		sockets[0] = acceptedSocket;
1663 	} else {
1664 		// close sockets on error
1665 		for (int i = 0; i < 2; i++) {
1666 			if (sockets[i] != NULL) {
1667 				socket_close(sockets[i]);
1668 				socket_free(sockets[i]);
1669 				sockets[i] = NULL;
1670 			}
1671 		}
1672 	}
1673 
1674 	return error;
1675 }
1676 
1677 
1678 //	#pragma mark -
1679 
1680 
1681 static status_t
1682 socket_std_ops(int32 op, ...)
1683 {
1684 	switch (op) {
1685 		case B_MODULE_INIT:
1686 		{
1687 			new (&sSocketList) SocketList;
1688 			mutex_init(&sSocketLock, "socket list");
1689 
1690 #if ENABLE_DEBUGGER_COMMANDS
1691 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1692 			add_debugger_command("socket", dump_socket, "dumps a socket");
1693 #endif
1694 			return B_OK;
1695 		}
1696 		case B_MODULE_UNINIT:
1697 			ASSERT(sSocketList.IsEmpty());
1698 			mutex_destroy(&sSocketLock);
1699 
1700 #if ENABLE_DEBUGGER_COMMANDS
1701 			remove_debugger_command("socket", dump_socket);
1702 			remove_debugger_command("sockets", dump_sockets);
1703 #endif
1704 			return B_OK;
1705 
1706 		default:
1707 			return B_ERROR;
1708 	}
1709 }
1710 
1711 
1712 net_socket_module_info gNetSocketModule = {
1713 	{
1714 		NET_SOCKET_MODULE_NAME,
1715 		0,
1716 		socket_std_ops
1717 	},
1718 	socket_open,
1719 	socket_close,
1720 	socket_free,
1721 
1722 	socket_readv,
1723 	socket_writev,
1724 	socket_control,
1725 
1726 	socket_read_avail,
1727 	socket_send_avail,
1728 
1729 	socket_send_data,
1730 	socket_receive_data,
1731 
1732 	socket_get_option,
1733 	socket_set_option,
1734 
1735 	socket_get_next_stat,
1736 
1737 	// connections
1738 	socket_acquire,
1739 	socket_release,
1740 	socket_spawn_pending,
1741 	socket_dequeue_connected,
1742 	socket_count_connected,
1743 	socket_set_max_backlog,
1744 	socket_has_parent,
1745 	socket_connected,
1746 	socket_aborted,
1747 
1748 	// notifications
1749 	socket_request_notification,
1750 	socket_cancel_notification,
1751 	socket_notify,
1752 
1753 	// standard socket API
1754 	socket_accept,
1755 	socket_bind,
1756 	socket_connect,
1757 	socket_getpeername,
1758 	socket_getsockname,
1759 	socket_getsockopt,
1760 	socket_listen,
1761 	socket_receive,
1762 	socket_send,
1763 	socket_setsockopt,
1764 	socket_shutdown,
1765 	socket_socketpair
1766 };
1767 
1768