xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 97dfeb96704e5dbc5bec32ad7b21379d0125e031)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static size_t
166 compute_user_iovec_length(iovec* userVec, uint32 count)
167 {
168 	size_t length = 0;
169 
170 	for (uint32 i = 0; i < count; i++) {
171 		iovec vec;
172 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
173 			return 0;
174 
175 		length += vec.iov_len;
176 	}
177 
178 	return length;
179 }
180 
181 
182 static status_t
183 create_socket(int family, int type, int protocol, net_socket_private** _socket)
184 {
185 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
186 	if (socket == NULL)
187 		return B_NO_MEMORY;
188 	status_t status = socket->InitCheck();
189 	if (status != B_OK) {
190 		delete socket;
191 		return status;
192 	}
193 
194 	socket->family = family;
195 	socket->type = type;
196 	socket->protocol = protocol;
197 
198 	status = get_domain_protocols(socket);
199 	if (status != B_OK) {
200 		delete socket;
201 		return status;
202 	}
203 
204 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
205 		socket->type, socket->protocol);
206 
207 #ifdef TRACE_SOCKET
208 	net_protocol* current = socket->first_protocol;
209 	for (int i = 0; current != NULL; current = current->next, i++)
210 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
211 #endif
212 
213 	*_socket = socket;
214 	return B_OK;
215 }
216 
217 
218 static status_t
219 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
220 	void* data, size_t dataLen)
221 {
222 	cmsghdr* header = (cmsghdr*)data;
223 
224 	while (dataLen > 0) {
225 		if (header->cmsg_len < sizeof(cmsghdr) || header->cmsg_len > dataLen)
226 			return B_BAD_VALUE;
227 
228 		if (socket->first_info->add_ancillary_data == NULL)
229 			return B_NOT_SUPPORTED;
230 
231 		status_t status = socket->first_info->add_ancillary_data(
232 			socket->first_protocol, container, header);
233 		if (status != B_OK)
234 			return status;
235 
236 		dataLen -= _ALIGN(header->cmsg_len);
237 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
238 	}
239 
240 	return B_OK;
241 }
242 
243 
244 static status_t
245 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
246 	msghdr* messageHeader)
247 {
248 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
249 	int dataBufferLen = messageHeader->msg_controllen;
250 
251 	if (container == NULL || dataBuffer == NULL) {
252 		messageHeader->msg_controllen = 0;
253 		return B_OK;
254 	}
255 
256 	ancillary_data_header header;
257 	void* data = NULL;
258 
259 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
260 		if (socket->first_info->process_ancillary_data == NULL)
261 			return B_NOT_SUPPORTED;
262 
263 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
264 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
265 		if (bytesWritten < 0)
266 			return bytesWritten;
267 
268 		dataBuffer += bytesWritten;
269 		dataBufferLen -= bytesWritten;
270 	}
271 
272 	messageHeader->msg_controllen -= dataBufferLen;
273 
274 	return B_OK;
275 }
276 
277 
278 static status_t
279 process_ancillary_data(net_socket* socket,
280 	net_buffer* buffer, msghdr* messageHeader)
281 {
282 	void *dataBuffer = messageHeader->msg_control;
283 	ssize_t bytesWritten;
284 
285 	if (dataBuffer == NULL) {
286 		messageHeader->msg_controllen = 0;
287 		return B_OK;
288 	}
289 
290 	if (socket->first_info->process_ancillary_data_no_container == NULL)
291 		return B_NOT_SUPPORTED;
292 
293 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
294 		socket->first_protocol, buffer, dataBuffer,
295 		messageHeader->msg_controllen);
296 	if (bytesWritten < 0)
297 		return bytesWritten;
298 	messageHeader->msg_controllen = bytesWritten;
299 
300 	return B_OK;
301 }
302 
303 
304 static ssize_t
305 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
306 	size_t length, int flags)
307 {
308 	iovec stackVec = { data, length };
309 	iovec* vecs = header ? header->msg_iov : &stackVec;
310 	int vecCount = header ? header->msg_iovlen : 1;
311 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
312 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
313 
314 	ancillary_data_container* ancillaryData = NULL;
315 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
316 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
317 		addressLen);
318 	if (bytesRead < 0)
319 		return bytesRead;
320 
321 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(ancillaryData,
322 		&delete_ancillary_data_container);
323 
324 	// process ancillary data
325 	if (header != NULL) {
326 		status_t status = process_ancillary_data(socket, ancillaryData, header);
327 		if (status != B_OK)
328 			return status;
329 
330 		header->msg_flags = 0;
331 	}
332 
333 	return bytesRead;
334 }
335 
336 
337 #if ENABLE_DEBUGGER_COMMANDS
338 
339 
340 static void
341 print_socket_line(net_socket_private* socket, const char* prefix)
342 {
343 	BReference<net_socket_private> parent = socket->parent.GetReference();
344 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
345 		socket->family, socket->type, socket->protocol, socket->owner,
346 		socket->first_protocol, socket->first_info, parent.Get(),
347 		parent.Get() != NULL ? socket->is_connected ? " (c)" : " (p)" : "");
348 }
349 
350 
351 static int
352 dump_socket(int argc, char** argv)
353 {
354 	if (argc < 2) {
355 		kprintf("usage: %s [address]\n", argv[0]);
356 		return 0;
357 	}
358 
359 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
360 
361 	kprintf("SOCKET %p\n", socket);
362 	kprintf("  family.type.protocol: %d.%d.%d\n",
363 		socket->family, socket->type, socket->protocol);
364 	BReference<net_socket_private> parent = socket->parent.GetReference();
365 	kprintf("  parent:               %p\n", parent.Get());
366 	kprintf("  first protocol:       %p\n", socket->first_protocol);
367 	kprintf("  first module_info:    %p\n", socket->first_info);
368 	kprintf("  options:              %x\n", socket->options);
369 	kprintf("  linger:               %d\n", socket->linger);
370 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
371 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
372 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
373 	kprintf("  is connected:         %d\n", socket->is_connected);
374 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
375 
376 	if (socket->child_count == 0)
377 		return 0;
378 
379 	kprintf("    pending children:\n");
380 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
381 	while (net_socket_private* child = iterator.Next()) {
382 		print_socket_line(child, "      ");
383 	}
384 
385 	kprintf("    connected children:\n");
386 	iterator = socket->connected_children.GetIterator();
387 	while (net_socket_private* child = iterator.Next()) {
388 		print_socket_line(child, "      ");
389 	}
390 
391 	return 0;
392 }
393 
394 
395 static int
396 dump_sockets(int argc, char** argv)
397 {
398 	kprintf("address        kind  owner protocol   module_info parent\n");
399 
400 	SocketList::Iterator iterator = sSocketList.GetIterator();
401 	while (net_socket_private* socket = iterator.Next()) {
402 		print_socket_line(socket, "");
403 
404 		SocketList::Iterator childIterator
405 			= socket->pending_children.GetIterator();
406 		while (net_socket_private* child = childIterator.Next()) {
407 			print_socket_line(child, " ");
408 		}
409 
410 		childIterator = socket->connected_children.GetIterator();
411 		while (net_socket_private* child = childIterator.Next()) {
412 			print_socket_line(child, " ");
413 		}
414 	}
415 
416 	return 0;
417 }
418 
419 
420 #endif	// ENABLE_DEBUGGER_COMMANDS
421 
422 
423 //	#pragma mark -
424 
425 
426 status_t
427 socket_open(int family, int type, int protocol, net_socket** _socket)
428 {
429 	net_socket_private* socket;
430 	status_t status = create_socket(family, type, protocol, &socket);
431 	if (status != B_OK)
432 		return status;
433 
434 	status = socket->first_info->open(socket->first_protocol);
435 	if (status != B_OK) {
436 		delete socket;
437 		return status;
438 	}
439 
440 	socket->owner = team_get_current_team_id();
441 	socket->is_in_socket_list = true;
442 
443 	mutex_lock(&sSocketLock);
444 	sSocketList.Add(socket);
445 	mutex_unlock(&sSocketLock);
446 
447 	*_socket = socket;
448 	return B_OK;
449 }
450 
451 
452 status_t
453 socket_close(net_socket* _socket)
454 {
455 	net_socket_private* socket = (net_socket_private*)_socket;
456 	return socket->first_info->close(socket->first_protocol);
457 }
458 
459 
460 void
461 socket_free(net_socket* _socket)
462 {
463 	net_socket_private* socket = (net_socket_private*)_socket;
464 	socket->first_info->free(socket->first_protocol);
465 	socket->ReleaseReference();
466 }
467 
468 
469 status_t
470 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
471 	size_t* _length)
472 {
473 	return -1;
474 }
475 
476 
477 status_t
478 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
479 	size_t* _length)
480 {
481 	if (socket->peer.ss_len == 0)
482 		return ECONNRESET;
483 
484 	if (socket->address.ss_len == 0) {
485 		// try to bind first
486 		status_t status = socket_bind(socket, NULL, 0);
487 		if (status != B_OK)
488 			return status;
489 	}
490 
491 	// TODO: useful, maybe even computed header space!
492 	net_buffer* buffer = gNetBufferModule.create(256);
493 	if (buffer == NULL)
494 		return ENOBUFS;
495 
496 	// copy data into buffer
497 
498 	for (uint32 i = 0; i < vecCount; i++) {
499 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
500 				vecs[i].iov_len) < B_OK) {
501 			gNetBufferModule.free(buffer);
502 			return ENOBUFS;
503 		}
504 	}
505 
506 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
507 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
508 	size_t size = buffer->size;
509 
510 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
511 		buffer);
512 	if (bytesWritten < B_OK) {
513 		if (buffer->size != size) {
514 			// this appears to be a partial write
515 			*_length = size - buffer->size;
516 		}
517 		gNetBufferModule.free(buffer);
518 		return bytesWritten;
519 	}
520 
521 	*_length = bytesWritten;
522 	return B_OK;
523 }
524 
525 
526 status_t
527 socket_control(net_socket* socket, int32 op, void* data, size_t length)
528 {
529 	switch (op) {
530 		case FIONBIO:
531 		{
532 			if (data == NULL)
533 				return B_BAD_VALUE;
534 
535 			int value;
536 			if (is_syscall()) {
537 				if (!IS_USER_ADDRESS(data)
538 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
539 					return B_BAD_ADDRESS;
540 				}
541 			} else
542 				value = *(int*)data;
543 
544 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
545 				sizeof(int));
546 		}
547 
548 		case FIONREAD:
549 		{
550 			if (data == NULL)
551 				return B_BAD_VALUE;
552 
553 			int available = (int)socket_read_avail(socket);
554 			if (available < 0)
555 				return available;
556 
557 			if (is_syscall()) {
558 				if (!IS_USER_ADDRESS(data)
559 					|| user_memcpy(data, &available, sizeof(available))
560 						!= B_OK) {
561 					return B_BAD_ADDRESS;
562 				}
563 			} else
564 				*(int*)data = available;
565 
566 			return B_OK;
567 		}
568 
569 		case B_SET_BLOCKING_IO:
570 		case B_SET_NONBLOCKING_IO:
571 		{
572 			int value = op == B_SET_NONBLOCKING_IO;
573 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
574 				sizeof(int));
575 		}
576 	}
577 
578 	return socket->first_info->control(socket->first_protocol,
579 		LEVEL_DRIVER_IOCTL, op, data, &length);
580 }
581 
582 
583 ssize_t
584 socket_read_avail(net_socket* socket)
585 {
586 	return socket->first_info->read_avail(socket->first_protocol);
587 }
588 
589 
590 ssize_t
591 socket_send_avail(net_socket* socket)
592 {
593 	return socket->first_info->send_avail(socket->first_protocol);
594 }
595 
596 
597 status_t
598 socket_send_data(net_socket* socket, net_buffer* buffer)
599 {
600 	return socket->first_info->send_data(socket->first_protocol,
601 		buffer);
602 }
603 
604 
605 status_t
606 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
607 	net_buffer** _buffer)
608 {
609 	status_t status = socket->first_info->read_data(socket->first_protocol,
610 		length, flags, _buffer);
611 	if (status != B_OK)
612 		return status;
613 
614 	if (*_buffer && length < (*_buffer)->size) {
615 		// discard any data behind the amount requested
616 		gNetBufferModule.trim(*_buffer, length);
617 	}
618 
619 	return status;
620 }
621 
622 
623 status_t
624 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
625 {
626 	MutexLocker locker(sSocketLock);
627 
628 	net_socket_private* socket = NULL;
629 	SocketList::Iterator iterator = sSocketList.GetIterator();
630 	uint32 cookie = *_cookie;
631 	uint32 count = 0;
632 
633 	while (true) {
634 		socket = iterator.Next();
635 		if (socket == NULL)
636 			return B_ENTRY_NOT_FOUND;
637 
638 		// TODO: also traverse the pending connections
639 		if (count == cookie)
640 			break;
641 
642 		if (family == -1 || family == socket->family)
643 			count++;
644 	}
645 
646 	*_cookie = count + 1;
647 
648 	stat->family = socket->family;
649 	stat->type = socket->type;
650 	stat->protocol = socket->protocol;
651 	stat->owner = socket->owner;
652 	stat->state[0] = '\0';
653 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
654 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
655 	stat->receive_queue_size = 0;
656 	stat->send_queue_size = 0;
657 
658 	// fill in protocol specific data (if supported by the protocol)
659 	size_t length = sizeof(net_stat);
660 	socket->first_info->control(socket->first_protocol, socket->protocol,
661 		NET_STAT_SOCKET, stat, &length);
662 
663 	return B_OK;
664 }
665 
666 
667 //	#pragma mark - connections
668 
669 
670 bool
671 socket_acquire(net_socket* _socket)
672 {
673 	net_socket_private* socket = (net_socket_private*)_socket;
674 
675 	// During destruction, the socket might still be accessible over its
676 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
677 	// socket anymore -- while not obvious, the endpoint protocol is responsible
678 	// for the proper locking here.
679 	if (socket->CountReferences() == 0)
680 		return false;
681 
682 	socket->AcquireReference();
683 	return true;
684 }
685 
686 
687 bool
688 socket_release(net_socket* _socket)
689 {
690 	net_socket_private* socket = (net_socket_private*)_socket;
691 	return socket->ReleaseReference();
692 }
693 
694 
695 status_t
696 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
697 {
698 	net_socket_private* parent = (net_socket_private*)_parent;
699 
700 	TRACE("%s(%p)\n", __FUNCTION__, parent);
701 
702 	MutexLocker locker(parent->lock);
703 
704 	// We actually accept more pending connections to compensate for those
705 	// that never complete, and also make sure at least a single connection
706 	// can always be accepted
707 	if (parent->child_count > 3 * parent->max_backlog / 2)
708 		return ENOBUFS;
709 
710 	net_socket_private* socket;
711 	status_t status = create_socket(parent->family, parent->type,
712 		parent->protocol, &socket);
713 	if (status != B_OK)
714 		return status;
715 
716 	// inherit parent's properties
717 	socket->send = parent->send;
718 	socket->receive = parent->receive;
719 	socket->options = parent->options & ~SO_ACCEPTCONN;
720 	socket->linger = parent->linger;
721 	socket->owner = parent->owner;
722 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
723 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
724 
725 	// add to the parent's list of pending connections
726 	parent->pending_children.Add(socket);
727 	socket->parent = parent;
728 	parent->child_count++;
729 
730 	*_socket = socket;
731 	return B_OK;
732 }
733 
734 
735 /*!	Dequeues a connected child from a parent socket.
736 	It also returns a reference with the child socket.
737 */
738 status_t
739 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
740 {
741 	net_socket_private* parent = (net_socket_private*)_parent;
742 
743 	mutex_lock(&parent->lock);
744 
745 	net_socket_private* socket = parent->connected_children.RemoveHead();
746 	if (socket != NULL) {
747 		socket->AcquireReference();
748 		socket->RemoveFromParent();
749 		parent->child_count--;
750 		*_socket = socket;
751 	}
752 
753 	mutex_unlock(&parent->lock);
754 
755 	if (socket == NULL)
756 		return B_ENTRY_NOT_FOUND;
757 
758 	return B_OK;
759 }
760 
761 
762 ssize_t
763 socket_count_connected(net_socket* _parent)
764 {
765 	net_socket_private* parent = (net_socket_private*)_parent;
766 
767 	MutexLocker _(parent->lock);
768 	return parent->connected_children.Count();
769 }
770 
771 
772 status_t
773 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
774 {
775 	net_socket_private* socket = (net_socket_private*)_socket;
776 
777 	// we enforce an upper limit of connections waiting to be accepted
778 	if (backlog > 256)
779 		backlog = 256;
780 
781 	MutexLocker _(socket->lock);
782 
783 	// first remove the pending connections, then the already connected
784 	// ones as needed
785 	net_socket_private* child;
786 	while (socket->child_count > backlog
787 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
788 		child->RemoveFromParent();
789 		socket->child_count--;
790 	}
791 	while (socket->child_count > backlog
792 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
793 		child->RemoveFromParent();
794 		socket->child_count--;
795 	}
796 
797 	socket->max_backlog = backlog;
798 	return B_OK;
799 }
800 
801 
802 /*!	Returns whether or not this socket has a parent. The parent might not be
803 	valid anymore, though.
804 */
805 bool
806 socket_has_parent(net_socket* _socket)
807 {
808 	net_socket_private* socket = (net_socket_private*)_socket;
809 	return socket->parent != NULL;
810 }
811 
812 
813 /*!	The socket has been connected. It will be moved to the connected queue
814 	of its parent socket.
815 */
816 status_t
817 socket_connected(net_socket* _socket)
818 {
819 	net_socket_private* socket = (net_socket_private*)_socket;
820 
821 	TRACE("socket_connected(%p)\n", socket);
822 
823 	BReference<net_socket_private> parent = socket->parent.GetReference();
824 	if (parent.Get() == NULL)
825 		return B_BAD_VALUE;
826 
827 	MutexLocker _(parent->lock);
828 
829 	parent->pending_children.Remove(socket);
830 	parent->connected_children.Add(socket);
831 	socket->is_connected = true;
832 
833 	// notify parent
834 	if (parent->select_pool)
835 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
836 
837 	return B_OK;
838 }
839 
840 
841 /*!	The socket has been aborted. Steals the parent's reference, and releases
842 	it.
843 */
844 status_t
845 socket_aborted(net_socket* _socket)
846 {
847 	net_socket_private* socket = (net_socket_private*)_socket;
848 
849 	TRACE("socket_aborted(%p)\n", socket);
850 
851 	BReference<net_socket_private> parent = socket->parent.GetReference();
852 	if (parent.Get() == NULL)
853 		return B_BAD_VALUE;
854 
855 	MutexLocker _(parent->lock);
856 
857 	if (socket->is_connected)
858 		parent->connected_children.Remove(socket);
859 	else
860 		parent->pending_children.Remove(socket);
861 
862 	parent->child_count--;
863 	socket->RemoveFromParent();
864 
865 	return B_OK;
866 }
867 
868 
869 //	#pragma mark - notifications
870 
871 
872 status_t
873 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
874 {
875 	net_socket_private* socket = (net_socket_private*)_socket;
876 
877 	mutex_lock(&socket->lock);
878 
879 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
880 		event);
881 
882 	mutex_unlock(&socket->lock);
883 
884 	if (status != B_OK)
885 		return status;
886 
887 	// check if the event is already present
888 	// TODO: add support for poll() types
889 
890 	switch (event) {
891 		case B_SELECT_READ:
892 		{
893 			ssize_t available = socket_read_avail(socket);
894 			if ((ssize_t)socket->receive.low_water_mark <= available
895 				|| available < B_OK)
896 				notify_select_event(sync, event);
897 			break;
898 		}
899 		case B_SELECT_WRITE:
900 		{
901 			ssize_t available = socket_send_avail(socket);
902 			if ((ssize_t)socket->send.low_water_mark <= available
903 				|| available < B_OK)
904 				notify_select_event(sync, event);
905 			break;
906 		}
907 		case B_SELECT_ERROR:
908 			if (socket->error != B_OK)
909 				notify_select_event(sync, event);
910 			break;
911 	}
912 
913 	return B_OK;
914 }
915 
916 
917 status_t
918 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
919 {
920 	net_socket_private* socket = (net_socket_private*)_socket;
921 
922 	MutexLocker _(socket->lock);
923 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
924 }
925 
926 
927 status_t
928 socket_notify(net_socket* _socket, uint8 event, int32 value)
929 {
930 	net_socket_private* socket = (net_socket_private*)_socket;
931 	bool notify = true;
932 
933 	switch (event) {
934 		case B_SELECT_READ:
935 			if ((ssize_t)socket->receive.low_water_mark > value
936 				&& value >= B_OK)
937 				notify = false;
938 			break;
939 
940 		case B_SELECT_WRITE:
941 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
942 				notify = false;
943 			break;
944 
945 		case B_SELECT_ERROR:
946 			socket->error = value;
947 			break;
948 	}
949 
950 	MutexLocker _(socket->lock);
951 
952 	if (notify && socket->select_pool != NULL) {
953 		notify_select_event_pool(socket->select_pool, event);
954 
955 		if (event == B_SELECT_ERROR) {
956 			// always notify read/write on error
957 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
958 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
959 		}
960 	}
961 
962 	return B_OK;
963 }
964 
965 
966 //	#pragma mark - standard socket API
967 
968 
969 int
970 socket_accept(net_socket* socket, struct sockaddr* address,
971 	socklen_t* _addressLength, net_socket** _acceptedSocket)
972 {
973 	if ((socket->options & SO_ACCEPTCONN) == 0)
974 		return B_BAD_VALUE;
975 
976 	net_socket* accepted;
977 	status_t status = socket->first_info->accept(socket->first_protocol,
978 		&accepted);
979 	if (status != B_OK)
980 		return status;
981 
982 	if (address && *_addressLength > 0) {
983 		memcpy(address, &accepted->peer, min_c(*_addressLength,
984 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
985 		*_addressLength = accepted->peer.ss_len;
986 	}
987 
988 	*_acceptedSocket = accepted;
989 	return B_OK;
990 }
991 
992 
993 int
994 socket_bind(net_socket* socket, const struct sockaddr* address,
995 	socklen_t addressLength)
996 {
997 	sockaddr empty;
998 	if (address == NULL) {
999 		// special - try to bind to an empty address, like INADDR_ANY
1000 		memset(&empty, 0, sizeof(sockaddr));
1001 		empty.sa_len = sizeof(sockaddr);
1002 		empty.sa_family = socket->family;
1003 
1004 		address = &empty;
1005 		addressLength = sizeof(sockaddr);
1006 	}
1007 
1008 	if (socket->address.ss_len != 0) {
1009 		status_t status = socket->first_info->unbind(socket->first_protocol,
1010 			(sockaddr*)&socket->address);
1011 		if (status != B_OK)
1012 			return status;
1013 	}
1014 
1015 	memcpy(&socket->address, address, sizeof(sockaddr));
1016 	socket->address.ss_len = sizeof(sockaddr_storage);
1017 
1018 	status_t status = socket->first_info->bind(socket->first_protocol,
1019 		(sockaddr*)address);
1020 	if (status != B_OK) {
1021 		// clear address again, as binding failed
1022 		socket->address.ss_len = 0;
1023 	}
1024 
1025 	return status;
1026 }
1027 
1028 
1029 int
1030 socket_connect(net_socket* socket, const struct sockaddr* address,
1031 	socklen_t addressLength)
1032 {
1033 	if (address == NULL || addressLength == 0)
1034 		return ENETUNREACH;
1035 
1036 	if (socket->address.ss_len == 0) {
1037 		// try to bind first
1038 		status_t status = socket_bind(socket, NULL, 0);
1039 		if (status != B_OK)
1040 			return status;
1041 	}
1042 
1043 	return socket->first_info->connect(socket->first_protocol, address);
1044 }
1045 
1046 
1047 int
1048 socket_getpeername(net_socket* socket, struct sockaddr* address,
1049 	socklen_t* _addressLength)
1050 {
1051 	if (socket->peer.ss_len == 0)
1052 		return ENOTCONN;
1053 
1054 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1055 	*_addressLength = socket->peer.ss_len;
1056 	return B_OK;
1057 }
1058 
1059 
1060 int
1061 socket_getsockname(net_socket* socket, struct sockaddr* address,
1062 	socklen_t* _addressLength)
1063 {
1064 	if (socket->address.ss_len == 0)
1065 		return ENOTCONN;
1066 
1067 	memcpy(address, &socket->address, min_c(*_addressLength,
1068 		socket->address.ss_len));
1069 	*_addressLength = socket->address.ss_len;
1070 	return B_OK;
1071 }
1072 
1073 
1074 status_t
1075 socket_get_option(net_socket* socket, int level, int option, void* value,
1076 	int* _length)
1077 {
1078 	if (level != SOL_SOCKET)
1079 		return ENOPROTOOPT;
1080 
1081 	switch (option) {
1082 		case SO_SNDBUF:
1083 		{
1084 			uint32* size = (uint32*)value;
1085 			*size = socket->send.buffer_size;
1086 			*_length = sizeof(uint32);
1087 			return B_OK;
1088 		}
1089 
1090 		case SO_RCVBUF:
1091 		{
1092 			uint32* size = (uint32*)value;
1093 			*size = socket->receive.buffer_size;
1094 			*_length = sizeof(uint32);
1095 			return B_OK;
1096 		}
1097 
1098 		case SO_SNDLOWAT:
1099 		{
1100 			uint32* size = (uint32*)value;
1101 			*size = socket->send.low_water_mark;
1102 			*_length = sizeof(uint32);
1103 			return B_OK;
1104 		}
1105 
1106 		case SO_RCVLOWAT:
1107 		{
1108 			uint32* size = (uint32*)value;
1109 			*size = socket->receive.low_water_mark;
1110 			*_length = sizeof(uint32);
1111 			return B_OK;
1112 		}
1113 
1114 		case SO_RCVTIMEO:
1115 		case SO_SNDTIMEO:
1116 		{
1117 			if (*_length < (int)sizeof(struct timeval))
1118 				return B_BAD_VALUE;
1119 
1120 			bigtime_t timeout;
1121 			if (option == SO_SNDTIMEO)
1122 				timeout = socket->send.timeout;
1123 			else
1124 				timeout = socket->receive.timeout;
1125 			if (timeout == B_INFINITE_TIMEOUT)
1126 				timeout = 0;
1127 
1128 			struct timeval* timeval = (struct timeval*)value;
1129 			timeval->tv_sec = timeout / 1000000LL;
1130 			timeval->tv_usec = timeout % 1000000LL;
1131 
1132 			*_length = sizeof(struct timeval);
1133 			return B_OK;
1134 		}
1135 
1136 		case SO_NONBLOCK:
1137 		{
1138 			int32* _set = (int32*)value;
1139 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1140 			*_length = sizeof(int32);
1141 			return B_OK;
1142 		}
1143 
1144 		case SO_ACCEPTCONN:
1145 		case SO_BROADCAST:
1146 		case SO_DEBUG:
1147 		case SO_DONTROUTE:
1148 		case SO_KEEPALIVE:
1149 		case SO_OOBINLINE:
1150 		case SO_REUSEADDR:
1151 		case SO_REUSEPORT:
1152 		case SO_USELOOPBACK:
1153 		{
1154 			int32* _set = (int32*)value;
1155 			*_set = (socket->options & option) != 0;
1156 			*_length = sizeof(int32);
1157 			return B_OK;
1158 		}
1159 
1160 		case SO_TYPE:
1161 		{
1162 			int32* _set = (int32*)value;
1163 			*_set = socket->type;
1164 			*_length = sizeof(int32);
1165 			return B_OK;
1166 		}
1167 
1168 		case SO_ERROR:
1169 		{
1170 			int32* _set = (int32*)value;
1171 			*_set = socket->error;
1172 			*_length = sizeof(int32);
1173 
1174 			socket->error = B_OK;
1175 				// clear error upon retrieval
1176 			return B_OK;
1177 		}
1178 
1179 		default:
1180 			break;
1181 	}
1182 
1183 	dprintf("socket_getsockopt: unknown option %d\n", option);
1184 	return ENOPROTOOPT;
1185 }
1186 
1187 
1188 int
1189 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1190 	int* _length)
1191 {
1192 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1193 		level, option, value, _length);
1194 }
1195 
1196 
1197 int
1198 socket_listen(net_socket* socket, int backlog)
1199 {
1200 	status_t status = socket->first_info->listen(socket->first_protocol,
1201 		backlog);
1202 	if (status == B_OK)
1203 		socket->options |= SO_ACCEPTCONN;
1204 
1205 	return status;
1206 }
1207 
1208 
1209 ssize_t
1210 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1211 	int flags)
1212 {
1213 	// If the protocol sports read_data_no_buffer() we use it.
1214 	if (socket->first_info->read_data_no_buffer != NULL)
1215 		return socket_receive_no_buffer(socket, header, data, length, flags);
1216 
1217 	size_t totalLength = length;
1218 	net_buffer* buffer;
1219 	int i;
1220 
1221 	// the convention to this function is that have header been
1222 	// present, { data, length } would have been iovec[0] and is
1223 	// always considered like that
1224 
1225 	if (header) {
1226 		// calculate the length considering all of the extra buffers
1227 		for (i = 1; i < header->msg_iovlen; i++)
1228 			totalLength += header->msg_iov[i].iov_len;
1229 	}
1230 
1231 	status_t status = socket->first_info->read_data(
1232 		socket->first_protocol, totalLength, flags, &buffer);
1233 	if (status != B_OK)
1234 		return status;
1235 
1236 	// process ancillary data
1237 	if (header != NULL) {
1238 		if (buffer != NULL && header->msg_control != NULL) {
1239 			ancillary_data_container* container
1240 				= gNetBufferModule.get_ancillary_data(buffer);
1241 			if (container != NULL)
1242 				status = process_ancillary_data(socket, container, header);
1243 			else
1244 				status = process_ancillary_data(socket, buffer, header);
1245 			if (status != B_OK) {
1246 				gNetBufferModule.free(buffer);
1247 				return status;
1248 			}
1249 		} else
1250 			header->msg_controllen = 0;
1251 	}
1252 
1253 	// TODO: - returning a NULL buffer when received 0 bytes
1254 	//         may not make much sense as we still need the address
1255 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1256 
1257 	size_t nameLen = 0;
1258 
1259 	if (header) {
1260 		// TODO: - consider the control buffer options
1261 		nameLen = header->msg_namelen;
1262 		header->msg_namelen = 0;
1263 		header->msg_flags = 0;
1264 	}
1265 
1266 	if (buffer == NULL)
1267 		return 0;
1268 
1269 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1270 
1271 	length = min_c(bytesReceived, length);
1272 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1273 		gNetBufferModule.free(buffer);
1274 		return ENOBUFS;
1275 	}
1276 
1277 	// if first copy was a success, proceed to following
1278 	// copies as required
1279 	bytesCopied += length;
1280 
1281 	if (header) {
1282 		// we only start considering at iovec[1]
1283 		// as { data, length } is iovec[0]
1284 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1285 			iovec& vec = header->msg_iov[i];
1286 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1287 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1288 					toRead) < B_OK) {
1289 				break;
1290 			}
1291 
1292 			bytesCopied += toRead;
1293 		}
1294 
1295 		if (header->msg_name != NULL) {
1296 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1297 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1298 		}
1299 	}
1300 
1301 	gNetBufferModule.free(buffer);
1302 
1303 	if (bytesCopied < bytesReceived) {
1304 		if (header)
1305 			header->msg_flags = MSG_TRUNC;
1306 
1307 		if (flags & MSG_TRUNC)
1308 			return bytesReceived;
1309 	}
1310 
1311 	return bytesCopied;
1312 }
1313 
1314 
1315 ssize_t
1316 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1317 	int flags)
1318 {
1319 	const sockaddr* address = NULL;
1320 	socklen_t addressLength = 0;
1321 	size_t bytesLeft = length;
1322 
1323 	if (length > SSIZE_MAX)
1324 		return B_BAD_VALUE;
1325 
1326 	ancillary_data_container* ancillaryData = NULL;
1327 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(NULL,
1328 		&delete_ancillary_data_container);
1329 
1330 	if (header != NULL) {
1331 		address = (const sockaddr*)header->msg_name;
1332 		addressLength = header->msg_namelen;
1333 
1334 		// get the ancillary data
1335 		if (header->msg_control != NULL) {
1336 			ancillaryData = create_ancillary_data_container();
1337 			if (ancillaryData == NULL)
1338 				return B_NO_MEMORY;
1339 			ancillaryDataDeleter.SetTo(ancillaryData);
1340 
1341 			status_t status = add_ancillary_data(socket, ancillaryData,
1342 				(cmsghdr*)header->msg_control, header->msg_controllen);
1343 			if (status != B_OK)
1344 				return status;
1345 		}
1346 	}
1347 
1348 	if (addressLength == 0)
1349 		address = NULL;
1350 	else if (address == NULL)
1351 		return B_BAD_VALUE;
1352 
1353 	if (socket->peer.ss_len != 0) {
1354 		if (address != NULL)
1355 			return EISCONN;
1356 
1357 		// socket is connected, we use that address
1358 		address = (struct sockaddr*)&socket->peer;
1359 		addressLength = socket->peer.ss_len;
1360 	}
1361 
1362 	if (address == NULL || addressLength == 0) {
1363 		// don't know where to send to:
1364 		return EDESTADDRREQ;
1365 	}
1366 
1367 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1368 		&& bytesLeft > socket->send.buffer_size)
1369 		return EMSGSIZE;
1370 
1371 	if (socket->address.ss_len == 0) {
1372 		// try to bind first
1373 		status_t status = socket_bind(socket, NULL, 0);
1374 		if (status != B_OK)
1375 			return status;
1376 	}
1377 
1378 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1379 	if (socket->first_info->send_data_no_buffer != NULL) {
1380 		iovec stackVec = { (void*)data, length };
1381 		iovec* vecs = header ? header->msg_iov : &stackVec;
1382 		int vecCount = header ? header->msg_iovlen : 1;
1383 
1384 		ssize_t written = socket->first_info->send_data_no_buffer(
1385 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1386 			addressLength);
1387 		if (written > 0)
1388 			ancillaryDataDeleter.Detach();
1389 		return written;
1390 	}
1391 
1392 	// By convention, if a header is given, the (data, length) equals the first
1393 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1394 	// the size of the remaining ones.
1395 	if (header != NULL) {
1396 		if (header->msg_iovlen <= 1)
1397 			header = NULL;
1398 		else {
1399 // TODO: The iovecs have already been copied to kernel space. Simplify!
1400 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1401 				header->msg_iovlen - 1);
1402 		}
1403 	}
1404 
1405 	ssize_t bytesSent = 0;
1406 	size_t vecOffset = 0;
1407 	uint32 vecIndex = 0;
1408 
1409 	while (bytesLeft > 0) {
1410 		// TODO: useful, maybe even computed header space!
1411 		net_buffer* buffer = gNetBufferModule.create(256);
1412 		if (buffer == NULL)
1413 			return ENOBUFS;
1414 
1415 		while (buffer->size < socket->send.buffer_size
1416 			&& buffer->size < bytesLeft) {
1417 			if (vecIndex > 0 && vecOffset == 0) {
1418 				// retrieve next iovec buffer from header
1419 				iovec vec;
1420 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1421 						< B_OK) {
1422 					gNetBufferModule.free(buffer);
1423 					return B_BAD_ADDRESS;
1424 				}
1425 
1426 				data = vec.iov_base;
1427 				length = vec.iov_len;
1428 			}
1429 
1430 			size_t bytes = length;
1431 			if (buffer->size + bytes > socket->send.buffer_size)
1432 				bytes = socket->send.buffer_size - buffer->size;
1433 
1434 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1435 				gNetBufferModule.free(buffer);
1436 				return ENOBUFS;
1437 			}
1438 
1439 			if (bytes != length) {
1440 				// partial send
1441 				vecOffset = bytes;
1442 				length -= vecOffset;
1443 				data = (uint8*)data + vecOffset;
1444 			} else if (header != NULL) {
1445 				// proceed with next buffer, if any
1446 				vecOffset = 0;
1447 				vecIndex++;
1448 
1449 				if (vecIndex >= (uint32)header->msg_iovlen)
1450 					break;
1451 			}
1452 		}
1453 
1454 		// attach ancillary data to the first buffer
1455 		status_t status = B_OK;
1456 		if (ancillaryData != NULL) {
1457 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1458 			ancillaryDataDeleter.Detach();
1459 			ancillaryData = NULL;
1460 		}
1461 
1462 		size_t bufferSize = buffer->size;
1463 		buffer->flags = flags;
1464 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1465 		memcpy(buffer->destination, address, addressLength);
1466 		buffer->destination->sa_len = addressLength;
1467 
1468 		if (status == B_OK) {
1469 			status = socket->first_info->send_data(socket->first_protocol,
1470 				buffer);
1471 		}
1472 		if (status != B_OK) {
1473 			size_t sizeAfterSend = buffer->size;
1474 			gNetBufferModule.free(buffer);
1475 
1476 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1477 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1478 				// this appears to be a partial write
1479 				return bytesSent + (bufferSize - sizeAfterSend);
1480 			}
1481 			return status;
1482 		}
1483 
1484 		bytesLeft -= bufferSize;
1485 		bytesSent += bufferSize;
1486 	}
1487 
1488 	return bytesSent;
1489 }
1490 
1491 
1492 status_t
1493 socket_set_option(net_socket* socket, int level, int option, const void* value,
1494 	int length)
1495 {
1496 	if (level != SOL_SOCKET)
1497 		return ENOPROTOOPT;
1498 
1499 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1500 
1501 	switch (option) {
1502 		// TODO: implement other options!
1503 		case SO_LINGER:
1504 		{
1505 			if (length < (int)sizeof(struct linger))
1506 				return B_BAD_VALUE;
1507 
1508 			struct linger* linger = (struct linger*)value;
1509 			if (linger->l_onoff) {
1510 				socket->options |= SO_LINGER;
1511 				socket->linger = linger->l_linger;
1512 			} else {
1513 				socket->options &= ~SO_LINGER;
1514 				socket->linger = 0;
1515 			}
1516 			return B_OK;
1517 		}
1518 
1519 		case SO_SNDBUF:
1520 			if (length != sizeof(uint32))
1521 				return B_BAD_VALUE;
1522 
1523 			socket->send.buffer_size = *(const uint32*)value;
1524 			return B_OK;
1525 
1526 		case SO_RCVBUF:
1527 			if (length != sizeof(uint32))
1528 				return B_BAD_VALUE;
1529 
1530 			socket->receive.buffer_size = *(const uint32*)value;
1531 			return B_OK;
1532 
1533 		case SO_SNDLOWAT:
1534 			if (length != sizeof(uint32))
1535 				return B_BAD_VALUE;
1536 
1537 			socket->send.low_water_mark = *(const uint32*)value;
1538 			return B_OK;
1539 
1540 		case SO_RCVLOWAT:
1541 			if (length != sizeof(uint32))
1542 				return B_BAD_VALUE;
1543 
1544 			socket->receive.low_water_mark = *(const uint32*)value;
1545 			return B_OK;
1546 
1547 		case SO_RCVTIMEO:
1548 		case SO_SNDTIMEO:
1549 		{
1550 			if (length != sizeof(struct timeval))
1551 				return B_BAD_VALUE;
1552 
1553 			const struct timeval* timeval = (const struct timeval*)value;
1554 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1555 			if (timeout == 0)
1556 				timeout = B_INFINITE_TIMEOUT;
1557 
1558 			if (option == SO_SNDTIMEO)
1559 				socket->send.timeout = timeout;
1560 			else
1561 				socket->receive.timeout = timeout;
1562 			return B_OK;
1563 		}
1564 
1565 		case SO_NONBLOCK:
1566 			if (length != sizeof(int32))
1567 				return B_BAD_VALUE;
1568 
1569 			if (*(const int32*)value) {
1570 				socket->send.timeout = 0;
1571 				socket->receive.timeout = 0;
1572 			} else {
1573 				socket->send.timeout = B_INFINITE_TIMEOUT;
1574 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1575 			}
1576 			return B_OK;
1577 
1578 		case SO_BROADCAST:
1579 		case SO_DEBUG:
1580 		case SO_DONTROUTE:
1581 		case SO_KEEPALIVE:
1582 		case SO_OOBINLINE:
1583 		case SO_REUSEADDR:
1584 		case SO_REUSEPORT:
1585 		case SO_USELOOPBACK:
1586 			if (length != sizeof(int32))
1587 				return B_BAD_VALUE;
1588 
1589 			if (*(const int32*)value)
1590 				socket->options |= option;
1591 			else
1592 				socket->options &= ~option;
1593 			return B_OK;
1594 
1595 		case SO_BINDTODEVICE:
1596 		{
1597 			if (length != sizeof(uint32))
1598 				return B_BAD_VALUE;
1599 
1600 			// TODO: we might want to check if the device exists at all
1601 			// (although it doesn't really harm when we don't)
1602 			socket->bound_to_device = *(const uint32*)value;
1603 			return B_OK;
1604 		}
1605 
1606 		default:
1607 			break;
1608 	}
1609 
1610 	dprintf("socket_setsockopt: unknown option %d\n", option);
1611 	return ENOPROTOOPT;
1612 }
1613 
1614 
1615 int
1616 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1617 	int length)
1618 {
1619 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1620 		level, option, value, length);
1621 }
1622 
1623 
1624 int
1625 socket_shutdown(net_socket* socket, int direction)
1626 {
1627 	return socket->first_info->shutdown(socket->first_protocol, direction);
1628 }
1629 
1630 
1631 status_t
1632 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1633 {
1634 	sockets[0] = NULL;
1635 	sockets[1] = NULL;
1636 
1637 	// create sockets
1638 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1639 	if (error != B_OK)
1640 		return error;
1641 
1642 	if (error == B_OK)
1643 		error = socket_open(family, type, protocol, &sockets[1]);
1644 
1645 	// bind one
1646 	if (error == B_OK)
1647 		error = socket_bind(sockets[0], NULL, 0);
1648 
1649 	// start listening
1650 	if (error == B_OK)
1651 		error = socket_listen(sockets[0], 1);
1652 
1653 	// connect them
1654 	if (error == B_OK) {
1655 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1656 			sockets[0]->address.ss_len);
1657 	}
1658 
1659 	// accept a socket
1660 	net_socket* acceptedSocket = NULL;
1661 	if (error == B_OK)
1662 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1663 
1664 	if (error == B_OK) {
1665 		// everything worked: close the listener socket
1666 		socket_close(sockets[0]);
1667 		socket_free(sockets[0]);
1668 		sockets[0] = acceptedSocket;
1669 	} else {
1670 		// close sockets on error
1671 		for (int i = 0; i < 2; i++) {
1672 			if (sockets[i] != NULL) {
1673 				socket_close(sockets[i]);
1674 				socket_free(sockets[i]);
1675 				sockets[i] = NULL;
1676 			}
1677 		}
1678 	}
1679 
1680 	return error;
1681 }
1682 
1683 
1684 //	#pragma mark -
1685 
1686 
1687 static status_t
1688 socket_std_ops(int32 op, ...)
1689 {
1690 	switch (op) {
1691 		case B_MODULE_INIT:
1692 		{
1693 			new (&sSocketList) SocketList;
1694 			mutex_init(&sSocketLock, "socket list");
1695 
1696 #if ENABLE_DEBUGGER_COMMANDS
1697 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1698 			add_debugger_command("socket", dump_socket, "dumps a socket");
1699 #endif
1700 			return B_OK;
1701 		}
1702 		case B_MODULE_UNINIT:
1703 			ASSERT(sSocketList.IsEmpty());
1704 			mutex_destroy(&sSocketLock);
1705 
1706 #if ENABLE_DEBUGGER_COMMANDS
1707 			remove_debugger_command("socket", dump_socket);
1708 			remove_debugger_command("sockets", dump_sockets);
1709 #endif
1710 			return B_OK;
1711 
1712 		default:
1713 			return B_ERROR;
1714 	}
1715 }
1716 
1717 
1718 net_socket_module_info gNetSocketModule = {
1719 	{
1720 		NET_SOCKET_MODULE_NAME,
1721 		0,
1722 		socket_std_ops
1723 	},
1724 	socket_open,
1725 	socket_close,
1726 	socket_free,
1727 
1728 	socket_readv,
1729 	socket_writev,
1730 	socket_control,
1731 
1732 	socket_read_avail,
1733 	socket_send_avail,
1734 
1735 	socket_send_data,
1736 	socket_receive_data,
1737 
1738 	socket_get_option,
1739 	socket_set_option,
1740 
1741 	socket_get_next_stat,
1742 
1743 	// connections
1744 	socket_acquire,
1745 	socket_release,
1746 	socket_spawn_pending,
1747 	socket_dequeue_connected,
1748 	socket_count_connected,
1749 	socket_set_max_backlog,
1750 	socket_has_parent,
1751 	socket_connected,
1752 	socket_aborted,
1753 
1754 	// notifications
1755 	socket_request_notification,
1756 	socket_cancel_notification,
1757 	socket_notify,
1758 
1759 	// standard socket API
1760 	socket_accept,
1761 	socket_bind,
1762 	socket_connect,
1763 	socket_getpeername,
1764 	socket_getsockname,
1765 	socket_getsockopt,
1766 	socket_listen,
1767 	socket_receive,
1768 	socket_send,
1769 	socket_setsockopt,
1770 	socket_shutdown,
1771 	socket_socketpair
1772 };
1773 
1774