xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 7cea5bf07ffaec7e25508f3b81a2e5bd989e1b34)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static size_t
166 compute_user_iovec_length(iovec* userVec, uint32 count)
167 {
168 	size_t length = 0;
169 
170 	for (uint32 i = 0; i < count; i++) {
171 		iovec vec;
172 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
173 			return 0;
174 
175 		length += vec.iov_len;
176 	}
177 
178 	return length;
179 }
180 
181 
182 static status_t
183 create_socket(int family, int type, int protocol, net_socket_private** _socket)
184 {
185 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
186 	if (socket == NULL)
187 		return B_NO_MEMORY;
188 	status_t status = socket->InitCheck();
189 	if (status != B_OK) {
190 		delete socket;
191 		return status;
192 	}
193 
194 	socket->family = family;
195 	socket->type = type;
196 	socket->protocol = protocol;
197 
198 	status = get_domain_protocols(socket);
199 	if (status != B_OK) {
200 		delete socket;
201 		return status;
202 	}
203 
204 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
205 		socket->type, socket->protocol);
206 
207 #ifdef TRACE_SOCKET
208 	net_protocol* current = socket->first_protocol;
209 	for (int i = 0; current != NULL; current = current->next, i++)
210 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
211 #endif
212 
213 	*_socket = socket;
214 	return B_OK;
215 }
216 
217 
218 static status_t
219 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
220 	void* data, size_t dataLen)
221 {
222 	cmsghdr* header = (cmsghdr*)data;
223 
224 	if (dataLen == 0)
225 		return B_OK;
226 
227 	if (socket->first_info->add_ancillary_data == NULL)
228 		return B_NOT_SUPPORTED;
229 
230 	while (true) {
231 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
232 			return B_BAD_VALUE;
233 
234 		status_t status = socket->first_info->add_ancillary_data(
235 			socket->first_protocol, container, header);
236 		if (status != B_OK)
237 			return status;
238 
239 		if (dataLen <= _ALIGN(header->cmsg_len))
240 			break;
241 		dataLen -= _ALIGN(header->cmsg_len);
242 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
243 	}
244 
245 	return B_OK;
246 }
247 
248 
249 static status_t
250 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
251 	msghdr* messageHeader)
252 {
253 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
254 	int dataBufferLen = messageHeader->msg_controllen;
255 
256 	if (container == NULL || dataBuffer == NULL) {
257 		messageHeader->msg_controllen = 0;
258 		return B_OK;
259 	}
260 
261 	ancillary_data_header header;
262 	void* data = NULL;
263 
264 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
265 		if (socket->first_info->process_ancillary_data == NULL)
266 			return B_NOT_SUPPORTED;
267 
268 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
269 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
270 		if (bytesWritten < 0)
271 			return bytesWritten;
272 
273 		dataBuffer += bytesWritten;
274 		dataBufferLen -= bytesWritten;
275 	}
276 
277 	messageHeader->msg_controllen -= dataBufferLen;
278 
279 	return B_OK;
280 }
281 
282 
283 static status_t
284 process_ancillary_data(net_socket* socket,
285 	net_buffer* buffer, msghdr* messageHeader)
286 {
287 	void *dataBuffer = messageHeader->msg_control;
288 	ssize_t bytesWritten;
289 
290 	if (dataBuffer == NULL) {
291 		messageHeader->msg_controllen = 0;
292 		return B_OK;
293 	}
294 
295 	if (socket->first_info->process_ancillary_data_no_container == NULL)
296 		return B_NOT_SUPPORTED;
297 
298 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
299 		socket->first_protocol, buffer, dataBuffer,
300 		messageHeader->msg_controllen);
301 	if (bytesWritten < 0)
302 		return bytesWritten;
303 	messageHeader->msg_controllen = bytesWritten;
304 
305 	return B_OK;
306 }
307 
308 
309 static ssize_t
310 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
311 	size_t length, int flags)
312 {
313 	iovec stackVec = { data, length };
314 	iovec* vecs = header ? header->msg_iov : &stackVec;
315 	int vecCount = header ? header->msg_iovlen : 1;
316 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
317 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
318 
319 	ancillary_data_container* ancillaryData = NULL;
320 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
321 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
322 		addressLen);
323 	if (bytesRead < 0)
324 		return bytesRead;
325 
326 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(ancillaryData,
327 		&delete_ancillary_data_container);
328 
329 	// process ancillary data
330 	if (header != NULL) {
331 		status_t status = process_ancillary_data(socket, ancillaryData, header);
332 		if (status != B_OK)
333 			return status;
334 
335 		header->msg_flags = 0;
336 	}
337 
338 	return bytesRead;
339 }
340 
341 
342 #if ENABLE_DEBUGGER_COMMANDS
343 
344 
345 static void
346 print_socket_line(net_socket_private* socket, const char* prefix)
347 {
348 	BReference<net_socket_private> parent;
349 	if (socket->parent.PrivatePointer() != NULL)
350 		parent = socket->parent.GetReference();
351 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
352 		socket->family, socket->type, socket->protocol, socket->owner,
353 		socket->first_protocol, socket->first_info, parent.Get(),
354 		parent.Get() != NULL ? socket->is_connected ? " (c)" : " (p)" : "");
355 }
356 
357 
358 static int
359 dump_socket(int argc, char** argv)
360 {
361 	if (argc < 2) {
362 		kprintf("usage: %s [address]\n", argv[0]);
363 		return 0;
364 	}
365 
366 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
367 
368 	kprintf("SOCKET %p\n", socket);
369 	kprintf("  family.type.protocol: %d.%d.%d\n",
370 		socket->family, socket->type, socket->protocol);
371 	BReference<net_socket_private> parent;
372 	if (socket->parent.PrivatePointer() != NULL)
373 		parent = socket->parent.GetReference();
374 	kprintf("  parent:               %p\n", parent.Get());
375 	kprintf("  first protocol:       %p\n", socket->first_protocol);
376 	kprintf("  first module_info:    %p\n", socket->first_info);
377 	kprintf("  options:              %x\n", socket->options);
378 	kprintf("  linger:               %d\n", socket->linger);
379 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
380 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
381 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
382 	kprintf("  is connected:         %d\n", socket->is_connected);
383 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
384 
385 	if (socket->child_count == 0)
386 		return 0;
387 
388 	kprintf("    pending children:\n");
389 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
390 	while (net_socket_private* child = iterator.Next()) {
391 		print_socket_line(child, "      ");
392 	}
393 
394 	kprintf("    connected children:\n");
395 	iterator = socket->connected_children.GetIterator();
396 	while (net_socket_private* child = iterator.Next()) {
397 		print_socket_line(child, "      ");
398 	}
399 
400 	return 0;
401 }
402 
403 
404 static int
405 dump_sockets(int argc, char** argv)
406 {
407 	kprintf("address        kind  owner protocol   module_info parent\n");
408 
409 	SocketList::Iterator iterator = sSocketList.GetIterator();
410 	while (net_socket_private* socket = iterator.Next()) {
411 		print_socket_line(socket, "");
412 
413 		SocketList::Iterator childIterator
414 			= socket->pending_children.GetIterator();
415 		while (net_socket_private* child = childIterator.Next()) {
416 			print_socket_line(child, " ");
417 		}
418 
419 		childIterator = socket->connected_children.GetIterator();
420 		while (net_socket_private* child = childIterator.Next()) {
421 			print_socket_line(child, " ");
422 		}
423 	}
424 
425 	return 0;
426 }
427 
428 
429 #endif	// ENABLE_DEBUGGER_COMMANDS
430 
431 
432 //	#pragma mark -
433 
434 
435 status_t
436 socket_open(int family, int type, int protocol, net_socket** _socket)
437 {
438 	net_socket_private* socket;
439 	status_t status = create_socket(family, type, protocol, &socket);
440 	if (status != B_OK)
441 		return status;
442 
443 	status = socket->first_info->open(socket->first_protocol);
444 	if (status != B_OK) {
445 		delete socket;
446 		return status;
447 	}
448 
449 	socket->owner = team_get_current_team_id();
450 	socket->is_in_socket_list = true;
451 
452 	mutex_lock(&sSocketLock);
453 	sSocketList.Add(socket);
454 	mutex_unlock(&sSocketLock);
455 
456 	*_socket = socket;
457 	return B_OK;
458 }
459 
460 
461 status_t
462 socket_close(net_socket* _socket)
463 {
464 	net_socket_private* socket = (net_socket_private*)_socket;
465 	return socket->first_info->close(socket->first_protocol);
466 }
467 
468 
469 void
470 socket_free(net_socket* _socket)
471 {
472 	net_socket_private* socket = (net_socket_private*)_socket;
473 	socket->first_info->free(socket->first_protocol);
474 	socket->ReleaseReference();
475 }
476 
477 
478 status_t
479 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
480 	size_t* _length)
481 {
482 	return -1;
483 }
484 
485 
486 status_t
487 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
488 	size_t* _length)
489 {
490 	if (socket->peer.ss_len == 0)
491 		return ECONNRESET;
492 
493 	if (socket->address.ss_len == 0) {
494 		// try to bind first
495 		status_t status = socket_bind(socket, NULL, 0);
496 		if (status != B_OK)
497 			return status;
498 	}
499 
500 	// TODO: useful, maybe even computed header space!
501 	net_buffer* buffer = gNetBufferModule.create(256);
502 	if (buffer == NULL)
503 		return ENOBUFS;
504 
505 	// copy data into buffer
506 
507 	for (uint32 i = 0; i < vecCount; i++) {
508 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
509 				vecs[i].iov_len) < B_OK) {
510 			gNetBufferModule.free(buffer);
511 			return ENOBUFS;
512 		}
513 	}
514 
515 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
516 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
517 	size_t size = buffer->size;
518 
519 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
520 		buffer);
521 	if (bytesWritten < B_OK) {
522 		if (buffer->size != size) {
523 			// this appears to be a partial write
524 			*_length = size - buffer->size;
525 		}
526 		gNetBufferModule.free(buffer);
527 		return bytesWritten;
528 	}
529 
530 	*_length = bytesWritten;
531 	return B_OK;
532 }
533 
534 
535 status_t
536 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
537 {
538 	switch (op) {
539 		case FIONBIO:
540 		{
541 			if (data == NULL)
542 				return B_BAD_VALUE;
543 
544 			int value;
545 			if (is_syscall()) {
546 				if (!IS_USER_ADDRESS(data)
547 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
548 					return B_BAD_ADDRESS;
549 				}
550 			} else
551 				value = *(int*)data;
552 
553 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
554 				sizeof(int));
555 		}
556 
557 		case FIONREAD:
558 		{
559 			if (data == NULL)
560 				return B_BAD_VALUE;
561 
562 			int available = (int)socket_read_avail(socket);
563 			if (available < 0)
564 				return available;
565 
566 			if (is_syscall()) {
567 				if (!IS_USER_ADDRESS(data)
568 					|| user_memcpy(data, &available, sizeof(available))
569 						!= B_OK) {
570 					return B_BAD_ADDRESS;
571 				}
572 			} else
573 				*(int*)data = available;
574 
575 			return B_OK;
576 		}
577 
578 		case B_SET_BLOCKING_IO:
579 		case B_SET_NONBLOCKING_IO:
580 		{
581 			int value = op == B_SET_NONBLOCKING_IO;
582 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
583 				sizeof(int));
584 		}
585 	}
586 
587 	return socket->first_info->control(socket->first_protocol,
588 		LEVEL_DRIVER_IOCTL, op, data, &length);
589 }
590 
591 
592 ssize_t
593 socket_read_avail(net_socket* socket)
594 {
595 	return socket->first_info->read_avail(socket->first_protocol);
596 }
597 
598 
599 ssize_t
600 socket_send_avail(net_socket* socket)
601 {
602 	return socket->first_info->send_avail(socket->first_protocol);
603 }
604 
605 
606 status_t
607 socket_send_data(net_socket* socket, net_buffer* buffer)
608 {
609 	return socket->first_info->send_data(socket->first_protocol,
610 		buffer);
611 }
612 
613 
614 status_t
615 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
616 	net_buffer** _buffer)
617 {
618 	status_t status = socket->first_info->read_data(socket->first_protocol,
619 		length, flags, _buffer);
620 	if (status != B_OK)
621 		return status;
622 
623 	if (*_buffer && length < (*_buffer)->size) {
624 		// discard any data behind the amount requested
625 		gNetBufferModule.trim(*_buffer, length);
626 	}
627 
628 	return status;
629 }
630 
631 
632 status_t
633 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
634 {
635 	MutexLocker locker(sSocketLock);
636 
637 	net_socket_private* socket = NULL;
638 	SocketList::Iterator iterator = sSocketList.GetIterator();
639 	uint32 cookie = *_cookie;
640 	uint32 count = 0;
641 
642 	while (true) {
643 		socket = iterator.Next();
644 		if (socket == NULL)
645 			return B_ENTRY_NOT_FOUND;
646 
647 		// TODO: also traverse the pending connections
648 		if (count == cookie)
649 			break;
650 
651 		if (family == -1 || family == socket->family)
652 			count++;
653 	}
654 
655 	*_cookie = count + 1;
656 
657 	stat->family = socket->family;
658 	stat->type = socket->type;
659 	stat->protocol = socket->protocol;
660 	stat->owner = socket->owner;
661 	stat->state[0] = '\0';
662 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
663 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
664 	stat->receive_queue_size = 0;
665 	stat->send_queue_size = 0;
666 
667 	// fill in protocol specific data (if supported by the protocol)
668 	size_t length = sizeof(net_stat);
669 	socket->first_info->control(socket->first_protocol, socket->protocol,
670 		NET_STAT_SOCKET, stat, &length);
671 
672 	return B_OK;
673 }
674 
675 
676 //	#pragma mark - connections
677 
678 
679 bool
680 socket_acquire(net_socket* _socket)
681 {
682 	net_socket_private* socket = (net_socket_private*)_socket;
683 
684 	// During destruction, the socket might still be accessible over its
685 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
686 	// socket anymore -- while not obvious, the endpoint protocol is responsible
687 	// for the proper locking here.
688 	if (socket->CountReferences() == 0)
689 		return false;
690 
691 	socket->AcquireReference();
692 	return true;
693 }
694 
695 
696 bool
697 socket_release(net_socket* _socket)
698 {
699 	net_socket_private* socket = (net_socket_private*)_socket;
700 	return socket->ReleaseReference();
701 }
702 
703 
704 status_t
705 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
706 {
707 	net_socket_private* parent = (net_socket_private*)_parent;
708 
709 	TRACE("%s(%p)\n", __FUNCTION__, parent);
710 
711 	MutexLocker locker(parent->lock);
712 
713 	// We actually accept more pending connections to compensate for those
714 	// that never complete, and also make sure at least a single connection
715 	// can always be accepted
716 	if (parent->child_count > 3 * parent->max_backlog / 2)
717 		return ENOBUFS;
718 
719 	net_socket_private* socket;
720 	status_t status = create_socket(parent->family, parent->type,
721 		parent->protocol, &socket);
722 	if (status != B_OK)
723 		return status;
724 
725 	// inherit parent's properties
726 	socket->send = parent->send;
727 	socket->receive = parent->receive;
728 	socket->options = parent->options & ~SO_ACCEPTCONN;
729 	socket->linger = parent->linger;
730 	socket->owner = parent->owner;
731 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
732 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
733 
734 	// add to the parent's list of pending connections
735 	parent->pending_children.Add(socket);
736 	socket->parent = parent;
737 	parent->child_count++;
738 
739 	*_socket = socket;
740 	return B_OK;
741 }
742 
743 
744 /*!	Dequeues a connected child from a parent socket.
745 	It also returns a reference with the child socket.
746 */
747 status_t
748 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
749 {
750 	net_socket_private* parent = (net_socket_private*)_parent;
751 
752 	mutex_lock(&parent->lock);
753 
754 	net_socket_private* socket = parent->connected_children.RemoveHead();
755 	if (socket != NULL) {
756 		socket->AcquireReference();
757 		socket->RemoveFromParent();
758 		parent->child_count--;
759 		*_socket = socket;
760 	}
761 
762 	mutex_unlock(&parent->lock);
763 
764 	if (socket == NULL)
765 		return B_ENTRY_NOT_FOUND;
766 
767 	return B_OK;
768 }
769 
770 
771 ssize_t
772 socket_count_connected(net_socket* _parent)
773 {
774 	net_socket_private* parent = (net_socket_private*)_parent;
775 
776 	MutexLocker _(parent->lock);
777 	return parent->connected_children.Count();
778 }
779 
780 
781 status_t
782 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
783 {
784 	net_socket_private* socket = (net_socket_private*)_socket;
785 
786 	// we enforce an upper limit of connections waiting to be accepted
787 	if (backlog > 256)
788 		backlog = 256;
789 
790 	MutexLocker _(socket->lock);
791 
792 	// first remove the pending connections, then the already connected
793 	// ones as needed
794 	net_socket_private* child;
795 	while (socket->child_count > backlog
796 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
797 		child->RemoveFromParent();
798 		socket->child_count--;
799 	}
800 	while (socket->child_count > backlog
801 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
802 		child->RemoveFromParent();
803 		socket->child_count--;
804 	}
805 
806 	socket->max_backlog = backlog;
807 	return B_OK;
808 }
809 
810 
811 /*!	Returns whether or not this socket has a parent. The parent might not be
812 	valid anymore, though.
813 */
814 bool
815 socket_has_parent(net_socket* _socket)
816 {
817 	net_socket_private* socket = (net_socket_private*)_socket;
818 	return socket->parent != NULL;
819 }
820 
821 
822 /*!	The socket has been connected. It will be moved to the connected queue
823 	of its parent socket.
824 */
825 status_t
826 socket_connected(net_socket* _socket)
827 {
828 	net_socket_private* socket = (net_socket_private*)_socket;
829 
830 	TRACE("socket_connected(%p)\n", socket);
831 
832 	if (socket->parent == NULL) {
833 		socket->is_connected = true;
834 		return B_OK;
835 	}
836 
837 	BReference<net_socket_private> parent = socket->parent.GetReference();
838 	if (parent.Get() == NULL)
839 		return B_BAD_VALUE;
840 
841 	MutexLocker _(parent->lock);
842 
843 	parent->pending_children.Remove(socket);
844 	parent->connected_children.Add(socket);
845 	socket->is_connected = true;
846 
847 	// notify parent
848 	if (parent->select_pool)
849 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
850 
851 	return B_OK;
852 }
853 
854 
855 /*!	The socket has been aborted. Steals the parent's reference, and releases
856 	it.
857 */
858 status_t
859 socket_aborted(net_socket* _socket)
860 {
861 	net_socket_private* socket = (net_socket_private*)_socket;
862 
863 	TRACE("socket_aborted(%p)\n", socket);
864 
865 	BReference<net_socket_private> parent = socket->parent.GetReference();
866 	if (parent.Get() == NULL)
867 		return B_BAD_VALUE;
868 
869 	MutexLocker _(parent->lock);
870 
871 	if (socket->is_connected)
872 		parent->connected_children.Remove(socket);
873 	else
874 		parent->pending_children.Remove(socket);
875 
876 	parent->child_count--;
877 	socket->RemoveFromParent();
878 
879 	return B_OK;
880 }
881 
882 
883 //	#pragma mark - notifications
884 
885 
886 status_t
887 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
888 {
889 	net_socket_private* socket = (net_socket_private*)_socket;
890 
891 	mutex_lock(&socket->lock);
892 
893 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
894 		event);
895 
896 	mutex_unlock(&socket->lock);
897 
898 	if (status != B_OK)
899 		return status;
900 
901 	// check if the event is already present
902 	// TODO: add support for poll() types
903 
904 	switch (event) {
905 		case B_SELECT_READ:
906 		{
907 			ssize_t available = socket_read_avail(socket);
908 			if ((ssize_t)socket->receive.low_water_mark <= available
909 				|| available < B_OK)
910 				notify_select_event(sync, event);
911 			break;
912 		}
913 		case B_SELECT_WRITE:
914 		{
915 			ssize_t available = socket_send_avail(socket);
916 			if ((ssize_t)socket->send.low_water_mark <= available
917 				|| available < B_OK)
918 				notify_select_event(sync, event);
919 			break;
920 		}
921 		case B_SELECT_ERROR:
922 			if (socket->error != B_OK)
923 				notify_select_event(sync, event);
924 			break;
925 	}
926 
927 	return B_OK;
928 }
929 
930 
931 status_t
932 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
933 {
934 	net_socket_private* socket = (net_socket_private*)_socket;
935 
936 	MutexLocker _(socket->lock);
937 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
938 }
939 
940 
941 status_t
942 socket_notify(net_socket* _socket, uint8 event, int32 value)
943 {
944 	net_socket_private* socket = (net_socket_private*)_socket;
945 	bool notify = true;
946 
947 	switch (event) {
948 		case B_SELECT_READ:
949 			if ((ssize_t)socket->receive.low_water_mark > value
950 				&& value >= B_OK)
951 				notify = false;
952 			break;
953 
954 		case B_SELECT_WRITE:
955 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
956 				notify = false;
957 			break;
958 
959 		case B_SELECT_ERROR:
960 			socket->error = value;
961 			break;
962 	}
963 
964 	MutexLocker _(socket->lock);
965 
966 	if (notify && socket->select_pool != NULL) {
967 		notify_select_event_pool(socket->select_pool, event);
968 
969 		if (event == B_SELECT_ERROR) {
970 			// always notify read/write on error
971 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
972 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
973 		}
974 	}
975 
976 	return B_OK;
977 }
978 
979 
980 //	#pragma mark - standard socket API
981 
982 
983 int
984 socket_accept(net_socket* socket, struct sockaddr* address,
985 	socklen_t* _addressLength, net_socket** _acceptedSocket)
986 {
987 	if ((socket->options & SO_ACCEPTCONN) == 0)
988 		return B_BAD_VALUE;
989 
990 	net_socket* accepted;
991 	status_t status = socket->first_info->accept(socket->first_protocol,
992 		&accepted);
993 	if (status != B_OK)
994 		return status;
995 
996 	if (address && *_addressLength > 0) {
997 		memcpy(address, &accepted->peer, min_c(*_addressLength,
998 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
999 		*_addressLength = accepted->peer.ss_len;
1000 	}
1001 
1002 	*_acceptedSocket = accepted;
1003 	return B_OK;
1004 }
1005 
1006 
1007 int
1008 socket_bind(net_socket* socket, const struct sockaddr* address,
1009 	socklen_t addressLength)
1010 {
1011 	sockaddr empty;
1012 	if (address == NULL) {
1013 		// special - try to bind to an empty address, like INADDR_ANY
1014 		memset(&empty, 0, sizeof(sockaddr));
1015 		empty.sa_len = sizeof(sockaddr);
1016 		empty.sa_family = socket->family;
1017 
1018 		address = &empty;
1019 		addressLength = sizeof(sockaddr);
1020 	}
1021 
1022 	if (socket->address.ss_len != 0) {
1023 		status_t status = socket->first_info->unbind(socket->first_protocol,
1024 			(sockaddr*)&socket->address);
1025 		if (status != B_OK)
1026 			return status;
1027 	}
1028 
1029 	memcpy(&socket->address, address, sizeof(sockaddr));
1030 	socket->address.ss_len = sizeof(sockaddr_storage);
1031 
1032 	status_t status = socket->first_info->bind(socket->first_protocol,
1033 		(sockaddr*)address);
1034 	if (status != B_OK) {
1035 		// clear address again, as binding failed
1036 		socket->address.ss_len = 0;
1037 	}
1038 
1039 	return status;
1040 }
1041 
1042 
1043 int
1044 socket_connect(net_socket* socket, const struct sockaddr* address,
1045 	socklen_t addressLength)
1046 {
1047 	if (address == NULL || addressLength == 0)
1048 		return ENETUNREACH;
1049 
1050 	if (socket->address.ss_len == 0) {
1051 		// try to bind first
1052 		status_t status = socket_bind(socket, NULL, 0);
1053 		if (status != B_OK)
1054 			return status;
1055 	}
1056 
1057 	return socket->first_info->connect(socket->first_protocol, address);
1058 }
1059 
1060 
1061 int
1062 socket_getpeername(net_socket* _socket, struct sockaddr* address,
1063 	socklen_t* _addressLength)
1064 {
1065 	net_socket_private* socket = (net_socket_private*)_socket;
1066 	if (!socket->is_connected || socket->peer.ss_len == 0)
1067 		return ENOTCONN;
1068 
1069 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1070 	*_addressLength = socket->peer.ss_len;
1071 	return B_OK;
1072 }
1073 
1074 
1075 int
1076 socket_getsockname(net_socket* socket, struct sockaddr* address,
1077 	socklen_t* _addressLength)
1078 {
1079 	if (socket->address.ss_len == 0) {
1080 		struct sockaddr buffer;
1081 		memset(&buffer, 0, sizeof(buffer));
1082 		buffer.sa_family = socket->family;
1083 
1084 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
1085 		*_addressLength = sizeof(buffer);
1086 		return B_OK;
1087 	}
1088 
1089 	memcpy(address, &socket->address, min_c(*_addressLength,
1090 		socket->address.ss_len));
1091 	*_addressLength = socket->address.ss_len;
1092 	return B_OK;
1093 }
1094 
1095 
1096 status_t
1097 socket_get_option(net_socket* socket, int level, int option, void* value,
1098 	int* _length)
1099 {
1100 	if (level != SOL_SOCKET)
1101 		return ENOPROTOOPT;
1102 
1103 	switch (option) {
1104 		case SO_SNDBUF:
1105 		{
1106 			uint32* size = (uint32*)value;
1107 			*size = socket->send.buffer_size;
1108 			*_length = sizeof(uint32);
1109 			return B_OK;
1110 		}
1111 
1112 		case SO_RCVBUF:
1113 		{
1114 			uint32* size = (uint32*)value;
1115 			*size = socket->receive.buffer_size;
1116 			*_length = sizeof(uint32);
1117 			return B_OK;
1118 		}
1119 
1120 		case SO_SNDLOWAT:
1121 		{
1122 			uint32* size = (uint32*)value;
1123 			*size = socket->send.low_water_mark;
1124 			*_length = sizeof(uint32);
1125 			return B_OK;
1126 		}
1127 
1128 		case SO_RCVLOWAT:
1129 		{
1130 			uint32* size = (uint32*)value;
1131 			*size = socket->receive.low_water_mark;
1132 			*_length = sizeof(uint32);
1133 			return B_OK;
1134 		}
1135 
1136 		case SO_RCVTIMEO:
1137 		case SO_SNDTIMEO:
1138 		{
1139 			if (*_length < (int)sizeof(struct timeval))
1140 				return B_BAD_VALUE;
1141 
1142 			bigtime_t timeout;
1143 			if (option == SO_SNDTIMEO)
1144 				timeout = socket->send.timeout;
1145 			else
1146 				timeout = socket->receive.timeout;
1147 			if (timeout == B_INFINITE_TIMEOUT)
1148 				timeout = 0;
1149 
1150 			struct timeval* timeval = (struct timeval*)value;
1151 			timeval->tv_sec = timeout / 1000000LL;
1152 			timeval->tv_usec = timeout % 1000000LL;
1153 
1154 			*_length = sizeof(struct timeval);
1155 			return B_OK;
1156 		}
1157 
1158 		case SO_NONBLOCK:
1159 		{
1160 			int32* _set = (int32*)value;
1161 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1162 			*_length = sizeof(int32);
1163 			return B_OK;
1164 		}
1165 
1166 		case SO_ACCEPTCONN:
1167 		case SO_BROADCAST:
1168 		case SO_DEBUG:
1169 		case SO_DONTROUTE:
1170 		case SO_KEEPALIVE:
1171 		case SO_OOBINLINE:
1172 		case SO_REUSEADDR:
1173 		case SO_REUSEPORT:
1174 		case SO_USELOOPBACK:
1175 		{
1176 			int32* _set = (int32*)value;
1177 			*_set = (socket->options & option) != 0;
1178 			*_length = sizeof(int32);
1179 			return B_OK;
1180 		}
1181 
1182 		case SO_TYPE:
1183 		{
1184 			int32* _set = (int32*)value;
1185 			*_set = socket->type;
1186 			*_length = sizeof(int32);
1187 			return B_OK;
1188 		}
1189 
1190 		case SO_ERROR:
1191 		{
1192 			int32* _set = (int32*)value;
1193 			*_set = socket->error;
1194 			*_length = sizeof(int32);
1195 
1196 			socket->error = B_OK;
1197 				// clear error upon retrieval
1198 			return B_OK;
1199 		}
1200 
1201 		default:
1202 			break;
1203 	}
1204 
1205 	dprintf("socket_getsockopt: unknown option %d\n", option);
1206 	return ENOPROTOOPT;
1207 }
1208 
1209 
1210 int
1211 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1212 	int* _length)
1213 {
1214 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1215 		level, option, value, _length);
1216 }
1217 
1218 
1219 int
1220 socket_listen(net_socket* socket, int backlog)
1221 {
1222 	status_t status = socket->first_info->listen(socket->first_protocol,
1223 		backlog);
1224 	if (status == B_OK)
1225 		socket->options |= SO_ACCEPTCONN;
1226 
1227 	return status;
1228 }
1229 
1230 
1231 ssize_t
1232 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1233 	int flags)
1234 {
1235 	// If the protocol sports read_data_no_buffer() we use it.
1236 	if (socket->first_info->read_data_no_buffer != NULL)
1237 		return socket_receive_no_buffer(socket, header, data, length, flags);
1238 
1239 	size_t totalLength = length;
1240 	net_buffer* buffer;
1241 	int i;
1242 
1243 	// the convention to this function is that have header been
1244 	// present, { data, length } would have been iovec[0] and is
1245 	// always considered like that
1246 
1247 	if (header) {
1248 		// calculate the length considering all of the extra buffers
1249 		for (i = 1; i < header->msg_iovlen; i++)
1250 			totalLength += header->msg_iov[i].iov_len;
1251 	}
1252 
1253 	status_t status = socket->first_info->read_data(
1254 		socket->first_protocol, totalLength, flags, &buffer);
1255 	if (status != B_OK)
1256 		return status;
1257 
1258 	// process ancillary data
1259 	if (header != NULL) {
1260 		if (buffer != NULL && header->msg_control != NULL) {
1261 			ancillary_data_container* container
1262 				= gNetBufferModule.get_ancillary_data(buffer);
1263 			if (container != NULL)
1264 				status = process_ancillary_data(socket, container, header);
1265 			else
1266 				status = process_ancillary_data(socket, buffer, header);
1267 			if (status != B_OK) {
1268 				gNetBufferModule.free(buffer);
1269 				return status;
1270 			}
1271 		} else
1272 			header->msg_controllen = 0;
1273 	}
1274 
1275 	// TODO: - returning a NULL buffer when received 0 bytes
1276 	//         may not make much sense as we still need the address
1277 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1278 
1279 	size_t nameLen = 0;
1280 
1281 	if (header) {
1282 		// TODO: - consider the control buffer options
1283 		nameLen = header->msg_namelen;
1284 		header->msg_namelen = 0;
1285 		header->msg_flags = 0;
1286 	}
1287 
1288 	if (buffer == NULL)
1289 		return 0;
1290 
1291 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1292 
1293 	length = min_c(bytesReceived, length);
1294 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1295 		gNetBufferModule.free(buffer);
1296 		return ENOBUFS;
1297 	}
1298 
1299 	// if first copy was a success, proceed to following
1300 	// copies as required
1301 	bytesCopied += length;
1302 
1303 	if (header) {
1304 		// we only start considering at iovec[1]
1305 		// as { data, length } is iovec[0]
1306 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1307 			iovec& vec = header->msg_iov[i];
1308 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1309 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1310 					toRead) < B_OK) {
1311 				break;
1312 			}
1313 
1314 			bytesCopied += toRead;
1315 		}
1316 
1317 		if (header->msg_name != NULL) {
1318 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1319 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1320 		}
1321 	}
1322 
1323 	gNetBufferModule.free(buffer);
1324 
1325 	if (bytesCopied < bytesReceived) {
1326 		if (header)
1327 			header->msg_flags = MSG_TRUNC;
1328 
1329 		if (flags & MSG_TRUNC)
1330 			return bytesReceived;
1331 	}
1332 
1333 	return bytesCopied;
1334 }
1335 
1336 
1337 ssize_t
1338 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1339 	int flags)
1340 {
1341 	const sockaddr* address = NULL;
1342 	socklen_t addressLength = 0;
1343 	size_t bytesLeft = length;
1344 
1345 	if (length > SSIZE_MAX)
1346 		return B_BAD_VALUE;
1347 
1348 	ancillary_data_container* ancillaryData = NULL;
1349 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(NULL,
1350 		&delete_ancillary_data_container);
1351 
1352 	if (header != NULL) {
1353 		address = (const sockaddr*)header->msg_name;
1354 		addressLength = header->msg_namelen;
1355 
1356 		// get the ancillary data
1357 		if (header->msg_control != NULL) {
1358 			ancillaryData = create_ancillary_data_container();
1359 			if (ancillaryData == NULL)
1360 				return B_NO_MEMORY;
1361 			ancillaryDataDeleter.SetTo(ancillaryData);
1362 
1363 			status_t status = add_ancillary_data(socket, ancillaryData,
1364 				(cmsghdr*)header->msg_control, header->msg_controllen);
1365 			if (status != B_OK)
1366 				return status;
1367 		}
1368 	}
1369 
1370 	if (addressLength == 0)
1371 		address = NULL;
1372 	else if (address == NULL)
1373 		return B_BAD_VALUE;
1374 
1375 	if (socket->peer.ss_len != 0) {
1376 		if (address != NULL)
1377 			return EISCONN;
1378 
1379 		// socket is connected, we use that address
1380 		address = (struct sockaddr*)&socket->peer;
1381 		addressLength = socket->peer.ss_len;
1382 	}
1383 
1384 	if (address == NULL || addressLength == 0) {
1385 		// don't know where to send to:
1386 		return EDESTADDRREQ;
1387 	}
1388 
1389 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1390 		&& bytesLeft > socket->send.buffer_size)
1391 		return EMSGSIZE;
1392 
1393 	if (socket->address.ss_len == 0) {
1394 		// try to bind first
1395 		status_t status = socket_bind(socket, NULL, 0);
1396 		if (status != B_OK)
1397 			return status;
1398 	}
1399 
1400 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1401 	if (socket->first_info->send_data_no_buffer != NULL) {
1402 		iovec stackVec = { (void*)data, length };
1403 		iovec* vecs = header ? header->msg_iov : &stackVec;
1404 		int vecCount = header ? header->msg_iovlen : 1;
1405 
1406 		ssize_t written = socket->first_info->send_data_no_buffer(
1407 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1408 			addressLength);
1409 		if (written > 0)
1410 			ancillaryDataDeleter.Detach();
1411 		return written;
1412 	}
1413 
1414 	// By convention, if a header is given, the (data, length) equals the first
1415 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1416 	// the size of the remaining ones.
1417 	if (header != NULL) {
1418 		if (header->msg_iovlen <= 1)
1419 			header = NULL;
1420 		else {
1421 // TODO: The iovecs have already been copied to kernel space. Simplify!
1422 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1423 				header->msg_iovlen - 1);
1424 		}
1425 	}
1426 
1427 	ssize_t bytesSent = 0;
1428 	size_t vecOffset = 0;
1429 	uint32 vecIndex = 0;
1430 
1431 	while (bytesLeft > 0) {
1432 		// TODO: useful, maybe even computed header space!
1433 		net_buffer* buffer = gNetBufferModule.create(256);
1434 		if (buffer == NULL)
1435 			return ENOBUFS;
1436 
1437 		while (buffer->size < socket->send.buffer_size
1438 			&& buffer->size < bytesLeft) {
1439 			if (vecIndex > 0 && vecOffset == 0) {
1440 				// retrieve next iovec buffer from header
1441 				iovec vec;
1442 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1443 						< B_OK) {
1444 					gNetBufferModule.free(buffer);
1445 					return B_BAD_ADDRESS;
1446 				}
1447 
1448 				data = vec.iov_base;
1449 				length = vec.iov_len;
1450 			}
1451 
1452 			size_t bytes = length;
1453 			if (buffer->size + bytes > socket->send.buffer_size)
1454 				bytes = socket->send.buffer_size - buffer->size;
1455 
1456 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1457 				gNetBufferModule.free(buffer);
1458 				return ENOBUFS;
1459 			}
1460 
1461 			if (bytes != length) {
1462 				// partial send
1463 				vecOffset = bytes;
1464 				length -= vecOffset;
1465 				data = (uint8*)data + vecOffset;
1466 			} else if (header != NULL) {
1467 				// proceed with next buffer, if any
1468 				vecOffset = 0;
1469 				vecIndex++;
1470 
1471 				if (vecIndex >= (uint32)header->msg_iovlen)
1472 					break;
1473 			}
1474 		}
1475 
1476 		// attach ancillary data to the first buffer
1477 		status_t status;
1478 		if (ancillaryData != NULL) {
1479 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1480 			ancillaryDataDeleter.Detach();
1481 			ancillaryData = NULL;
1482 		}
1483 
1484 		size_t bufferSize = buffer->size;
1485 		buffer->flags = flags;
1486 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1487 		memcpy(buffer->destination, address, addressLength);
1488 		buffer->destination->sa_len = addressLength;
1489 
1490 		status = socket->first_info->send_data(socket->first_protocol, buffer);
1491 		if (status != B_OK) {
1492 			size_t sizeAfterSend = buffer->size;
1493 			gNetBufferModule.free(buffer);
1494 
1495 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1496 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1497 				// this appears to be a partial write
1498 				return bytesSent + (bufferSize - sizeAfterSend);
1499 			}
1500 			return status;
1501 		}
1502 
1503 		bytesLeft -= bufferSize;
1504 		bytesSent += bufferSize;
1505 	}
1506 
1507 	return bytesSent;
1508 }
1509 
1510 
1511 status_t
1512 socket_set_option(net_socket* socket, int level, int option, const void* value,
1513 	int length)
1514 {
1515 	if (level != SOL_SOCKET)
1516 		return ENOPROTOOPT;
1517 
1518 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1519 
1520 	switch (option) {
1521 		// TODO: implement other options!
1522 		case SO_LINGER:
1523 		{
1524 			if (length < (int)sizeof(struct linger))
1525 				return B_BAD_VALUE;
1526 
1527 			struct linger* linger = (struct linger*)value;
1528 			if (linger->l_onoff) {
1529 				socket->options |= SO_LINGER;
1530 				socket->linger = linger->l_linger;
1531 			} else {
1532 				socket->options &= ~SO_LINGER;
1533 				socket->linger = 0;
1534 			}
1535 			return B_OK;
1536 		}
1537 
1538 		case SO_SNDBUF:
1539 			if (length != sizeof(uint32))
1540 				return B_BAD_VALUE;
1541 
1542 			socket->send.buffer_size = *(const uint32*)value;
1543 			return B_OK;
1544 
1545 		case SO_RCVBUF:
1546 			if (length != sizeof(uint32))
1547 				return B_BAD_VALUE;
1548 
1549 			socket->receive.buffer_size = *(const uint32*)value;
1550 			return B_OK;
1551 
1552 		case SO_SNDLOWAT:
1553 			if (length != sizeof(uint32))
1554 				return B_BAD_VALUE;
1555 
1556 			socket->send.low_water_mark = *(const uint32*)value;
1557 			return B_OK;
1558 
1559 		case SO_RCVLOWAT:
1560 			if (length != sizeof(uint32))
1561 				return B_BAD_VALUE;
1562 
1563 			socket->receive.low_water_mark = *(const uint32*)value;
1564 			return B_OK;
1565 
1566 		case SO_RCVTIMEO:
1567 		case SO_SNDTIMEO:
1568 		{
1569 			if (length != sizeof(struct timeval))
1570 				return B_BAD_VALUE;
1571 
1572 			const struct timeval* timeval = (const struct timeval*)value;
1573 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1574 			if (timeout == 0)
1575 				timeout = B_INFINITE_TIMEOUT;
1576 
1577 			if (option == SO_SNDTIMEO)
1578 				socket->send.timeout = timeout;
1579 			else
1580 				socket->receive.timeout = timeout;
1581 			return B_OK;
1582 		}
1583 
1584 		case SO_NONBLOCK:
1585 			if (length != sizeof(int32))
1586 				return B_BAD_VALUE;
1587 
1588 			if (*(const int32*)value) {
1589 				socket->send.timeout = 0;
1590 				socket->receive.timeout = 0;
1591 			} else {
1592 				socket->send.timeout = B_INFINITE_TIMEOUT;
1593 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1594 			}
1595 			return B_OK;
1596 
1597 		case SO_BROADCAST:
1598 		case SO_DEBUG:
1599 		case SO_DONTROUTE:
1600 		case SO_KEEPALIVE:
1601 		case SO_OOBINLINE:
1602 		case SO_REUSEADDR:
1603 		case SO_REUSEPORT:
1604 		case SO_USELOOPBACK:
1605 			if (length != sizeof(int32))
1606 				return B_BAD_VALUE;
1607 
1608 			if (*(const int32*)value)
1609 				socket->options |= option;
1610 			else
1611 				socket->options &= ~option;
1612 			return B_OK;
1613 
1614 		case SO_BINDTODEVICE:
1615 		{
1616 			if (length != sizeof(uint32))
1617 				return B_BAD_VALUE;
1618 
1619 			// TODO: we might want to check if the device exists at all
1620 			// (although it doesn't really harm when we don't)
1621 			socket->bound_to_device = *(const uint32*)value;
1622 			return B_OK;
1623 		}
1624 
1625 		default:
1626 			break;
1627 	}
1628 
1629 	dprintf("socket_setsockopt: unknown option %d\n", option);
1630 	return ENOPROTOOPT;
1631 }
1632 
1633 
1634 int
1635 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1636 	int length)
1637 {
1638 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1639 		level, option, value, length);
1640 }
1641 
1642 
1643 int
1644 socket_shutdown(net_socket* socket, int direction)
1645 {
1646 	return socket->first_info->shutdown(socket->first_protocol, direction);
1647 }
1648 
1649 
1650 status_t
1651 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1652 {
1653 	sockets[0] = NULL;
1654 	sockets[1] = NULL;
1655 
1656 	// create sockets
1657 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1658 	if (error != B_OK)
1659 		return error;
1660 
1661 	error = socket_open(family, type, protocol, &sockets[1]);
1662 
1663 	// bind one
1664 	if (error == B_OK)
1665 		error = socket_bind(sockets[0], NULL, 0);
1666 
1667 	// start listening
1668 	if (error == B_OK)
1669 		error = socket_listen(sockets[0], 1);
1670 
1671 	// connect them
1672 	if (error == B_OK) {
1673 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1674 			sockets[0]->address.ss_len);
1675 	}
1676 
1677 	// accept a socket
1678 	net_socket* acceptedSocket = NULL;
1679 	if (error == B_OK)
1680 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1681 
1682 	if (error == B_OK) {
1683 		// everything worked: close the listener socket
1684 		socket_close(sockets[0]);
1685 		socket_free(sockets[0]);
1686 		sockets[0] = acceptedSocket;
1687 	} else {
1688 		// close sockets on error
1689 		for (int i = 0; i < 2; i++) {
1690 			if (sockets[i] != NULL) {
1691 				socket_close(sockets[i]);
1692 				socket_free(sockets[i]);
1693 				sockets[i] = NULL;
1694 			}
1695 		}
1696 	}
1697 
1698 	return error;
1699 }
1700 
1701 
1702 //	#pragma mark -
1703 
1704 
1705 static status_t
1706 socket_std_ops(int32 op, ...)
1707 {
1708 	switch (op) {
1709 		case B_MODULE_INIT:
1710 		{
1711 			new (&sSocketList) SocketList;
1712 			mutex_init(&sSocketLock, "socket list");
1713 
1714 #if ENABLE_DEBUGGER_COMMANDS
1715 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1716 			add_debugger_command("socket", dump_socket, "dumps a socket");
1717 #endif
1718 			return B_OK;
1719 		}
1720 		case B_MODULE_UNINIT:
1721 			ASSERT(sSocketList.IsEmpty());
1722 			mutex_destroy(&sSocketLock);
1723 
1724 #if ENABLE_DEBUGGER_COMMANDS
1725 			remove_debugger_command("socket", dump_socket);
1726 			remove_debugger_command("sockets", dump_sockets);
1727 #endif
1728 			return B_OK;
1729 
1730 		default:
1731 			return B_ERROR;
1732 	}
1733 }
1734 
1735 
1736 net_socket_module_info gNetSocketModule = {
1737 	{
1738 		NET_SOCKET_MODULE_NAME,
1739 		0,
1740 		socket_std_ops
1741 	},
1742 	socket_open,
1743 	socket_close,
1744 	socket_free,
1745 
1746 	socket_readv,
1747 	socket_writev,
1748 	socket_control,
1749 
1750 	socket_read_avail,
1751 	socket_send_avail,
1752 
1753 	socket_send_data,
1754 	socket_receive_data,
1755 
1756 	socket_get_option,
1757 	socket_set_option,
1758 
1759 	socket_get_next_stat,
1760 
1761 	// connections
1762 	socket_acquire,
1763 	socket_release,
1764 	socket_spawn_pending,
1765 	socket_dequeue_connected,
1766 	socket_count_connected,
1767 	socket_set_max_backlog,
1768 	socket_has_parent,
1769 	socket_connected,
1770 	socket_aborted,
1771 
1772 	// notifications
1773 	socket_request_notification,
1774 	socket_cancel_notification,
1775 	socket_notify,
1776 
1777 	// standard socket API
1778 	socket_accept,
1779 	socket_bind,
1780 	socket_connect,
1781 	socket_getpeername,
1782 	socket_getsockname,
1783 	socket_getsockopt,
1784 	socket_listen,
1785 	socket_receive,
1786 	socket_send,
1787 	socket_setsockopt,
1788 	socket_shutdown,
1789 	socket_socketpair
1790 };
1791 
1792