xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 344ded80d400028c8f561b4b876257b94c12db4a)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 //#define TRACE_SOCKET
41 #ifdef TRACE_SOCKET
42 #	define TRACE(x...) dprintf(STACK_DEBUG_PREFIX x)
43 #else
44 #	define TRACE(x...) ;
45 #endif
46 
47 
48 struct net_socket_private;
49 typedef DoublyLinkedList<net_socket_private> SocketList;
50 
51 struct net_socket_private : net_socket,
52 		DoublyLinkedListLinkImpl<net_socket_private>,
53 		BWeakReferenceable {
54 	net_socket_private();
55 	~net_socket_private();
56 
57 	void RemoveFromParent();
58 
59 	BWeakReference<net_socket_private> parent;
60 	team_id						owner;
61 	uint32						max_backlog;
62 	uint32						child_count;
63 	SocketList					pending_children;
64 	SocketList					connected_children;
65 
66 	struct select_sync_pool*	select_pool;
67 	mutex						lock;
68 
69 	bool						is_connected;
70 	bool						is_in_socket_list;
71 };
72 
73 
74 int socket_bind(net_socket* socket, const struct sockaddr* address,
75 	socklen_t addressLength);
76 int socket_setsockopt(net_socket* socket, int level, int option,
77 	const void* value, int length);
78 ssize_t socket_read_avail(net_socket* socket);
79 
80 static SocketList sSocketList;
81 static mutex sSocketLock;
82 
83 
84 net_socket_private::net_socket_private()
85 	:
86 	owner(-1),
87 	max_backlog(0),
88 	child_count(0),
89 	select_pool(NULL),
90 	is_connected(false),
91 	is_in_socket_list(false)
92 {
93 	first_protocol = NULL;
94 	first_info = NULL;
95 	options = 0;
96 	linger = 0;
97 	bound_to_device = 0;
98 	error = 0;
99 
100 	address.ss_len = 0;
101 	peer.ss_len = 0;
102 
103 	mutex_init(&lock, "socket");
104 
105 	// set defaults (may be overridden by the protocols)
106 	send.buffer_size = 65535;
107 	send.low_water_mark = 1;
108 	send.timeout = B_INFINITE_TIMEOUT;
109 	receive.buffer_size = 65535;
110 	receive.low_water_mark = 1;
111 	receive.timeout = B_INFINITE_TIMEOUT;
112 }
113 
114 
115 net_socket_private::~net_socket_private()
116 {
117 	TRACE("delete net_socket %p\n", this);
118 
119 	if (parent != NULL)
120 		panic("socket still has a parent!");
121 
122 	if (is_in_socket_list) {
123 		MutexLocker _(sSocketLock);
124 		sSocketList.Remove(this);
125 	}
126 
127 	mutex_lock(&lock);
128 
129 	// also delete all children of this socket
130 	while (net_socket_private* child = pending_children.RemoveHead()) {
131 		child->RemoveFromParent();
132 	}
133 	while (net_socket_private* child = connected_children.RemoveHead()) {
134 		child->RemoveFromParent();
135 	}
136 
137 	mutex_unlock(&lock);
138 
139 	put_domain_protocols(this);
140 
141 	mutex_destroy(&lock);
142 }
143 
144 
145 void
146 net_socket_private::RemoveFromParent()
147 {
148 	ASSERT(!is_in_socket_list && parent != NULL);
149 
150 	parent = NULL;
151 
152 	mutex_lock(&sSocketLock);
153 	sSocketList.Add(this);
154 	mutex_unlock(&sSocketLock);
155 
156 	is_in_socket_list = true;
157 
158 	ReleaseReference();
159 }
160 
161 
162 //	#pragma mark -
163 
164 
165 static status_t
166 create_socket(int family, int type, int protocol, net_socket_private** _socket)
167 {
168 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
169 	if (socket == NULL)
170 		return B_NO_MEMORY;
171 	status_t status = socket->InitCheck();
172 	if (status != B_OK) {
173 		delete socket;
174 		return status;
175 	}
176 
177 	socket->family = family;
178 	socket->type = type;
179 	socket->protocol = protocol;
180 
181 	status = get_domain_protocols(socket);
182 	if (status != B_OK) {
183 		delete socket;
184 		return status;
185 	}
186 
187 	TRACE("create net_socket %p (%u.%u.%u):\n", socket, socket->family,
188 		socket->type, socket->protocol);
189 
190 #ifdef TRACE_SOCKET
191 	net_protocol* current = socket->first_protocol;
192 	for (int i = 0; current != NULL; current = current->next, i++)
193 		TRACE("  [%d] %p  %s\n", i, current, current->module->info.name);
194 #endif
195 
196 	*_socket = socket;
197 	return B_OK;
198 }
199 
200 
201 static status_t
202 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
203 	void* data, size_t dataLen)
204 {
205 	cmsghdr* header = (cmsghdr*)data;
206 
207 	if (dataLen == 0)
208 		return B_OK;
209 
210 	if (socket->first_info->add_ancillary_data == NULL)
211 		return B_NOT_SUPPORTED;
212 
213 	while (true) {
214 		if (header->cmsg_len < CMSG_LEN(0) || header->cmsg_len > dataLen)
215 			return B_BAD_VALUE;
216 
217 		status_t status = socket->first_info->add_ancillary_data(
218 			socket->first_protocol, container, header);
219 		if (status != B_OK)
220 			return status;
221 
222 		const size_t alignedLength = CMSG_ALIGN(header->cmsg_len);
223 		if (dataLen <= alignedLength)
224 			break;
225 
226 		dataLen -= alignedLength;
227 		header = (cmsghdr*)((uint8*)header + alignedLength);
228 	}
229 
230 	return B_OK;
231 }
232 
233 
234 static status_t
235 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
236 	msghdr* messageHeader)
237 {
238 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
239 	int dataBufferLen = messageHeader->msg_controllen;
240 
241 	if (container == NULL || dataBuffer == NULL) {
242 		messageHeader->msg_controllen = 0;
243 		return B_OK;
244 	}
245 
246 	if (socket->first_info->process_ancillary_data == NULL)
247 		return B_NOT_SUPPORTED;
248 
249 	ssize_t bytesWritten = socket->first_info->process_ancillary_data(
250 		socket->first_protocol, container, dataBuffer, dataBufferLen);
251 	if (bytesWritten < 0)
252 		return bytesWritten;
253 
254 	messageHeader->msg_controllen = bytesWritten;
255 	return B_OK;
256 }
257 
258 
259 static status_t
260 process_ancillary_data(net_socket* socket,
261 	net_buffer* buffer, msghdr* messageHeader)
262 {
263 	void *dataBuffer = messageHeader->msg_control;
264 	ssize_t bytesWritten;
265 
266 	if (dataBuffer == NULL) {
267 		messageHeader->msg_controllen = 0;
268 		return B_OK;
269 	}
270 
271 	if (socket->first_info->process_ancillary_data_no_container == NULL)
272 		return B_NOT_SUPPORTED;
273 
274 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
275 		socket->first_protocol, buffer, dataBuffer,
276 		messageHeader->msg_controllen);
277 	if (bytesWritten < 0)
278 		return bytesWritten;
279 	messageHeader->msg_controllen = bytesWritten;
280 
281 	return B_OK;
282 }
283 
284 
285 static ssize_t
286 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
287 	size_t length, int flags)
288 {
289 	iovec stackVec = { data, length };
290 	iovec* vecs = header ? header->msg_iov : &stackVec;
291 	int vecCount = header ? header->msg_iovlen : 1;
292 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
293 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
294 
295 	ancillary_data_container* ancillaryData = NULL;
296 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
297 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
298 		addressLen, flags);
299 	if (bytesRead < 0)
300 		return bytesRead;
301 
302 	CObjectDeleter<
303 		ancillary_data_container, void, delete_ancillary_data_container>
304 		ancillaryDataDeleter(ancillaryData);
305 
306 	// process ancillary data
307 	if (header != NULL) {
308 		status_t status = process_ancillary_data(socket, ancillaryData, header);
309 		if (status != B_OK)
310 			return status;
311 
312 		header->msg_flags = 0;
313 	}
314 
315 	return bytesRead;
316 }
317 
318 
319 #if ENABLE_DEBUGGER_COMMANDS
320 
321 
322 static void
323 print_socket_line(net_socket_private* socket, const char* prefix)
324 {
325 	BReference<net_socket_private> parent = socket->parent.GetReference();
326 	kprintf("%s%p %2d.%2d.%2d %6" B_PRId32 " %p %p  %p%s\n", prefix, socket,
327 		socket->family, socket->type, socket->protocol, socket->owner,
328 		socket->first_protocol, socket->first_info, parent.Get(),
329 		parent.IsSet() ? socket->is_connected ? " (c)" : " (p)" : "");
330 }
331 
332 
333 static int
334 dump_socket(int argc, char** argv)
335 {
336 	if (argc < 2) {
337 		kprintf("usage: %s [address]\n", argv[0]);
338 		return 0;
339 	}
340 
341 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
342 
343 	kprintf("SOCKET %p\n", socket);
344 	kprintf("  family.type.protocol: %d.%d.%d\n",
345 		socket->family, socket->type, socket->protocol);
346 	BReference<net_socket_private> parent = socket->parent.GetReference();
347 	kprintf("  parent:               %p\n", parent.Get());
348 	kprintf("  first protocol:       %p\n", socket->first_protocol);
349 	kprintf("  first module_info:    %p\n", socket->first_info);
350 	kprintf("  options:              %x\n", socket->options);
351 	kprintf("  linger:               %d\n", socket->linger);
352 	kprintf("  bound to device:      %" B_PRIu32 "\n", socket->bound_to_device);
353 	kprintf("  owner:                %" B_PRId32 "\n", socket->owner);
354 	kprintf("  max backlog:          %" B_PRId32 "\n", socket->max_backlog);
355 	kprintf("  is connected:         %d\n", socket->is_connected);
356 	kprintf("  child_count:          %" B_PRIu32 "\n", socket->child_count);
357 
358 	if (socket->child_count == 0)
359 		return 0;
360 
361 	kprintf("    pending children:\n");
362 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
363 	while (net_socket_private* child = iterator.Next()) {
364 		print_socket_line(child, "      ");
365 	}
366 
367 	kprintf("    connected children:\n");
368 	iterator = socket->connected_children.GetIterator();
369 	while (net_socket_private* child = iterator.Next()) {
370 		print_socket_line(child, "      ");
371 	}
372 
373 	return 0;
374 }
375 
376 
377 static int
378 dump_sockets(int argc, char** argv)
379 {
380 	kprintf("address        kind  owner protocol   module_info parent\n");
381 
382 	SocketList::Iterator iterator = sSocketList.GetIterator();
383 	while (net_socket_private* socket = iterator.Next()) {
384 		print_socket_line(socket, "");
385 
386 		SocketList::Iterator childIterator
387 			= socket->pending_children.GetIterator();
388 		while (net_socket_private* child = childIterator.Next()) {
389 			print_socket_line(child, " ");
390 		}
391 
392 		childIterator = socket->connected_children.GetIterator();
393 		while (net_socket_private* child = childIterator.Next()) {
394 			print_socket_line(child, " ");
395 		}
396 	}
397 
398 	return 0;
399 }
400 
401 
402 #endif	// ENABLE_DEBUGGER_COMMANDS
403 
404 
405 //	#pragma mark -
406 
407 
408 status_t
409 socket_open(int family, int type, int protocol, net_socket** _socket)
410 {
411 	net_socket_private* socket;
412 	status_t status = create_socket(family, type, protocol, &socket);
413 	if (status != B_OK)
414 		return status;
415 
416 	status = socket->first_info->open(socket->first_protocol);
417 	if (status != B_OK) {
418 		delete socket;
419 		return status;
420 	}
421 
422 	socket->owner = team_get_current_team_id();
423 	socket->is_in_socket_list = true;
424 
425 	mutex_lock(&sSocketLock);
426 	sSocketList.Add(socket);
427 	mutex_unlock(&sSocketLock);
428 
429 	*_socket = socket;
430 	return B_OK;
431 }
432 
433 
434 status_t
435 socket_close(net_socket* _socket)
436 {
437 	net_socket_private* socket = (net_socket_private*)_socket;
438 	return socket->first_info->close(socket->first_protocol);
439 }
440 
441 
442 void
443 socket_free(net_socket* _socket)
444 {
445 	net_socket_private* socket = (net_socket_private*)_socket;
446 	socket->first_info->free(socket->first_protocol);
447 	socket->ReleaseReference();
448 }
449 
450 
451 status_t
452 socket_control(net_socket* socket, uint32 op, void* data, size_t length)
453 {
454 	switch (op) {
455 		case FIONREAD:
456 		{
457 			if (data == NULL || (socket->options & SO_ACCEPTCONN) != 0)
458 				return B_BAD_VALUE;
459 
460 			int available = (int)socket_read_avail(socket);
461 			if (available < 0)
462 				available = 0;
463 
464 			if (is_syscall()) {
465 				if (!IS_USER_ADDRESS(data)
466 					|| user_memcpy(data, &available, sizeof(available))
467 						!= B_OK) {
468 					return B_BAD_ADDRESS;
469 				}
470 			} else
471 				*(int*)data = available;
472 
473 			return B_OK;
474 		}
475 
476 		case B_SET_BLOCKING_IO:
477 		case B_SET_NONBLOCKING_IO:
478 		{
479 			int value = op == B_SET_NONBLOCKING_IO;
480 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
481 				sizeof(int));
482 		}
483 	}
484 
485 	return socket->first_info->control(socket->first_protocol,
486 		LEVEL_DRIVER_IOCTL, op, data, &length);
487 }
488 
489 
490 ssize_t
491 socket_read_avail(net_socket* socket)
492 {
493 	return socket->first_info->read_avail(socket->first_protocol);
494 }
495 
496 
497 ssize_t
498 socket_send_avail(net_socket* socket)
499 {
500 	return socket->first_info->send_avail(socket->first_protocol);
501 }
502 
503 
504 status_t
505 socket_send_data(net_socket* socket, net_buffer* buffer)
506 {
507 	return socket->first_info->send_data(socket->first_protocol,
508 		buffer);
509 }
510 
511 
512 status_t
513 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
514 	net_buffer** _buffer)
515 {
516 	status_t status = socket->first_info->read_data(socket->first_protocol,
517 		length, flags, _buffer);
518 	if (status != B_OK)
519 		return status;
520 
521 	if (*_buffer && length < (*_buffer)->size) {
522 		// discard any data behind the amount requested
523 		gNetBufferModule.trim(*_buffer, length);
524 	}
525 
526 	return status;
527 }
528 
529 
530 status_t
531 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
532 {
533 	MutexLocker locker(sSocketLock);
534 
535 	net_socket_private* socket = NULL;
536 	SocketList::Iterator iterator = sSocketList.GetIterator();
537 	uint32 cookie = *_cookie;
538 	uint32 count = 0;
539 
540 	while (true) {
541 		socket = iterator.Next();
542 		if (socket == NULL)
543 			return B_ENTRY_NOT_FOUND;
544 
545 		// TODO: also traverse the pending connections
546 		if (count == cookie)
547 			break;
548 
549 		if (family == -1 || family == socket->family)
550 			count++;
551 	}
552 
553 	*_cookie = count + 1;
554 
555 	stat->family = socket->family;
556 	stat->type = socket->type;
557 	stat->protocol = socket->protocol;
558 	stat->owner = socket->owner;
559 	stat->state[0] = '\0';
560 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
561 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
562 	stat->receive_queue_size = 0;
563 	stat->send_queue_size = 0;
564 
565 	// fill in protocol specific data (if supported by the protocol)
566 	size_t length = sizeof(net_stat);
567 	socket->first_info->control(socket->first_protocol, socket->protocol,
568 		NET_STAT_SOCKET, stat, &length);
569 
570 	return B_OK;
571 }
572 
573 
574 //	#pragma mark - connections
575 
576 
577 bool
578 socket_acquire(net_socket* _socket)
579 {
580 	net_socket_private* socket = (net_socket_private*)_socket;
581 
582 	// During destruction, the socket might still be accessible over its
583 	// endpoint protocol. We need to make sure the endpoint cannot acquire the
584 	// socket anymore -- while not obvious, the endpoint protocol is responsible
585 	// for the proper locking here.
586 	if (socket->CountReferences() == 0)
587 		return false;
588 
589 	socket->AcquireReference();
590 	return true;
591 }
592 
593 
594 bool
595 socket_release(net_socket* _socket)
596 {
597 	net_socket_private* socket = (net_socket_private*)_socket;
598 	return socket->ReleaseReference();
599 }
600 
601 
602 status_t
603 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
604 {
605 	net_socket_private* parent = (net_socket_private*)_parent;
606 
607 	TRACE("%s(%p)\n", __FUNCTION__, parent);
608 
609 	MutexLocker locker(parent->lock);
610 
611 	// We actually accept more pending connections to compensate for those
612 	// that never complete, and also make sure at least a single connection
613 	// can always be accepted
614 	if (parent->child_count > 3 * parent->max_backlog / 2)
615 		return ENOBUFS;
616 
617 	net_socket_private* socket;
618 	status_t status = create_socket(parent->family, parent->type,
619 		parent->protocol, &socket);
620 	if (status != B_OK)
621 		return status;
622 
623 	// inherit parent's properties
624 	socket->send = parent->send;
625 	socket->receive = parent->receive;
626 	socket->options = parent->options & (SO_KEEPALIVE | SO_DONTROUTE | SO_LINGER | SO_OOBINLINE);
627 	socket->linger = parent->linger;
628 	socket->owner = parent->owner;
629 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
630 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
631 
632 	// add to the parent's list of pending connections
633 	parent->pending_children.Add(socket);
634 	socket->parent = parent;
635 	parent->child_count++;
636 
637 	*_socket = socket;
638 	return B_OK;
639 }
640 
641 
642 /*!	Dequeues a connected child from a parent socket.
643 	It also returns a reference with the child socket.
644 */
645 status_t
646 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
647 {
648 	net_socket_private* parent = (net_socket_private*)_parent;
649 
650 	mutex_lock(&parent->lock);
651 
652 	net_socket_private* socket = parent->connected_children.RemoveHead();
653 	if (socket != NULL) {
654 		socket->AcquireReference();
655 		socket->RemoveFromParent();
656 		parent->child_count--;
657 		*_socket = socket;
658 	}
659 
660 	mutex_unlock(&parent->lock);
661 
662 	if (socket == NULL)
663 		return B_ENTRY_NOT_FOUND;
664 
665 	return B_OK;
666 }
667 
668 
669 ssize_t
670 socket_count_connected(net_socket* _parent)
671 {
672 	net_socket_private* parent = (net_socket_private*)_parent;
673 
674 	MutexLocker _(parent->lock);
675 	return parent->connected_children.Count();
676 }
677 
678 
679 status_t
680 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
681 {
682 	net_socket_private* socket = (net_socket_private*)_socket;
683 
684 	// we enforce an upper limit of connections waiting to be accepted
685 	if (backlog > 256)
686 		backlog = 256;
687 
688 	MutexLocker _(socket->lock);
689 
690 	// first remove the pending connections, then the already connected
691 	// ones as needed
692 	net_socket_private* child;
693 	while (socket->child_count > backlog
694 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
695 		child->RemoveFromParent();
696 		socket->child_count--;
697 	}
698 	while (socket->child_count > backlog
699 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
700 		child->RemoveFromParent();
701 		socket->child_count--;
702 	}
703 
704 	socket->max_backlog = backlog;
705 	return B_OK;
706 }
707 
708 
709 /*!	Returns whether or not this socket has a parent. The parent might not be
710 	valid anymore, though.
711 */
712 bool
713 socket_has_parent(net_socket* _socket)
714 {
715 	net_socket_private* socket = (net_socket_private*)_socket;
716 	return socket->parent != NULL;
717 }
718 
719 
720 /*!	The socket has been connected. It will be moved to the connected queue
721 	of its parent socket.
722 */
723 status_t
724 socket_connected(net_socket* _socket)
725 {
726 	net_socket_private* socket = (net_socket_private*)_socket;
727 
728 	TRACE("socket_connected(%p)\n", socket);
729 
730 	if (socket->parent == NULL) {
731 		socket->is_connected = true;
732 		return B_OK;
733 	}
734 
735 	BReference<net_socket_private> parent = socket->parent.GetReference();
736 	if (!parent.IsSet())
737 		return B_BAD_VALUE;
738 
739 	MutexLocker _(parent->lock);
740 
741 	parent->pending_children.Remove(socket);
742 	parent->connected_children.Add(socket);
743 	socket->is_connected = true;
744 
745 	// notify parent
746 	if (parent->select_pool)
747 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
748 
749 	return B_OK;
750 }
751 
752 
753 /*!	The socket has been aborted. Steals the parent's reference, and releases
754 	it.
755 */
756 status_t
757 socket_aborted(net_socket* _socket)
758 {
759 	net_socket_private* socket = (net_socket_private*)_socket;
760 
761 	TRACE("socket_aborted(%p)\n", socket);
762 
763 	BReference<net_socket_private> parent = socket->parent.GetReference();
764 	if (!parent.IsSet())
765 		return B_BAD_VALUE;
766 
767 	MutexLocker _(parent->lock);
768 
769 	if (socket->is_connected)
770 		parent->connected_children.Remove(socket);
771 	else
772 		parent->pending_children.Remove(socket);
773 
774 	parent->child_count--;
775 	socket->RemoveFromParent();
776 
777 	return B_OK;
778 }
779 
780 
781 //	#pragma mark - notifications
782 
783 
784 status_t
785 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
786 {
787 	net_socket_private* socket = (net_socket_private*)_socket;
788 
789 	mutex_lock(&socket->lock);
790 
791 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
792 		event);
793 
794 	mutex_unlock(&socket->lock);
795 
796 	if (status != B_OK)
797 		return status;
798 
799 	// check if the event is already present
800 	// TODO: add support for poll() types
801 
802 	switch (event) {
803 		case B_SELECT_READ:
804 		{
805 			ssize_t available = socket_read_avail(socket);
806 			if ((ssize_t)socket->receive.low_water_mark <= available
807 				|| available < B_OK)
808 				notify_select_event(sync, event);
809 			break;
810 		}
811 		case B_SELECT_WRITE:
812 		{
813 			if ((socket->options & SO_ACCEPTCONN) != 0)
814 				break;
815 
816 			ssize_t available = socket_send_avail(socket);
817 			if ((ssize_t)socket->send.low_water_mark <= available
818 				|| available < B_OK)
819 				notify_select_event(sync, event);
820 			break;
821 		}
822 		case B_SELECT_ERROR:
823 			if (socket->error != B_OK)
824 				notify_select_event(sync, event);
825 			break;
826 	}
827 
828 	return B_OK;
829 }
830 
831 
832 status_t
833 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
834 {
835 	net_socket_private* socket = (net_socket_private*)_socket;
836 
837 	MutexLocker _(socket->lock);
838 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
839 }
840 
841 
842 status_t
843 socket_notify(net_socket* _socket, uint8 event, int32 value)
844 {
845 	net_socket_private* socket = (net_socket_private*)_socket;
846 	bool notify = true;
847 
848 	switch (event) {
849 		case B_SELECT_READ:
850 			if ((ssize_t)socket->receive.low_water_mark > value
851 				&& value >= B_OK)
852 				notify = false;
853 			break;
854 
855 		case B_SELECT_WRITE:
856 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
857 				notify = false;
858 			break;
859 
860 		case B_SELECT_ERROR:
861 			socket->error = value;
862 			break;
863 	}
864 
865 	MutexLocker _(socket->lock);
866 
867 	if (notify && socket->select_pool != NULL) {
868 		notify_select_event_pool(socket->select_pool, event);
869 
870 		if (event == B_SELECT_ERROR) {
871 			// always notify read/write on error
872 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
873 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
874 		}
875 	}
876 
877 	return B_OK;
878 }
879 
880 
881 //	#pragma mark - standard socket API
882 
883 
884 int
885 socket_accept(net_socket* socket, struct sockaddr* address,
886 	socklen_t* _addressLength, net_socket** _acceptedSocket)
887 {
888 	if ((socket->options & SO_ACCEPTCONN) == 0)
889 		return B_BAD_VALUE;
890 
891 	net_socket* accepted;
892 	status_t status = socket->first_info->accept(socket->first_protocol,
893 		&accepted);
894 	if (status != B_OK)
895 		return status;
896 
897 	if (address && *_addressLength > 0) {
898 		memcpy(address, &accepted->peer, min_c(*_addressLength,
899 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
900 		*_addressLength = accepted->peer.ss_len;
901 	}
902 
903 	*_acceptedSocket = accepted;
904 	return B_OK;
905 }
906 
907 
908 int
909 socket_bind(net_socket* socket, const struct sockaddr* address,
910 	socklen_t addressLength)
911 {
912 	sockaddr empty;
913 	if (address == NULL) {
914 		// special - try to bind to an empty address, like INADDR_ANY
915 		memset(&empty, 0, sizeof(sockaddr));
916 		empty.sa_len = sizeof(sockaddr);
917 		empty.sa_family = socket->family;
918 
919 		address = &empty;
920 		addressLength = sizeof(sockaddr);
921 	}
922 
923 	if (socket->address.ss_len != 0)
924 		return B_BAD_VALUE;
925 
926 	memcpy(&socket->address, address, sizeof(sockaddr));
927 	socket->address.ss_len = sizeof(sockaddr_storage);
928 
929 	status_t status = socket->first_info->bind(socket->first_protocol,
930 		(sockaddr*)address);
931 	if (status != B_OK) {
932 		// clear address again, as binding failed
933 		socket->address.ss_len = 0;
934 	}
935 
936 	return status;
937 }
938 
939 
940 int
941 socket_connect(net_socket* socket, const struct sockaddr* address,
942 	socklen_t addressLength)
943 {
944 	if (address == NULL || addressLength == 0)
945 		return ENETUNREACH;
946 
947 	if (socket->address.ss_len == 0) {
948 		// try to bind first
949 		status_t status = socket_bind(socket, NULL, 0);
950 		if (status != B_OK)
951 			return status;
952 	}
953 
954 	return socket->first_info->connect(socket->first_protocol, address);
955 }
956 
957 
958 int
959 socket_getpeername(net_socket* _socket, struct sockaddr* address,
960 	socklen_t* _addressLength)
961 {
962 	net_socket_private* socket = (net_socket_private*)_socket;
963 	BReference<net_socket_private> parent = socket->parent.GetReference();
964 
965 	if ((!parent.IsSet() && !socket->is_connected) || socket->peer.ss_len == 0)
966 		return ENOTCONN;
967 
968 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
969 	*_addressLength = socket->peer.ss_len;
970 	return B_OK;
971 }
972 
973 
974 int
975 socket_getsockname(net_socket* socket, struct sockaddr* address,
976 	socklen_t* _addressLength)
977 {
978 	if (socket->address.ss_len == 0) {
979 		struct sockaddr buffer;
980 		memset(&buffer, 0, sizeof(buffer));
981 		buffer.sa_family = socket->family;
982 
983 		memcpy(address, &buffer, min_c(*_addressLength, sizeof(buffer)));
984 		*_addressLength = sizeof(buffer);
985 		return B_OK;
986 	}
987 
988 	memcpy(address, &socket->address, min_c(*_addressLength,
989 		socket->address.ss_len));
990 	*_addressLength = socket->address.ss_len;
991 	return B_OK;
992 }
993 
994 
995 status_t
996 socket_get_option(net_socket* socket, int level, int option, void* value,
997 	int* _length)
998 {
999 	if (level != SOL_SOCKET)
1000 		return ENOPROTOOPT;
1001 
1002 	switch (option) {
1003 		case SO_SNDBUF:
1004 		{
1005 			uint32* size = (uint32*)value;
1006 			*size = socket->send.buffer_size;
1007 			*_length = sizeof(uint32);
1008 			return B_OK;
1009 		}
1010 
1011 		case SO_RCVBUF:
1012 		{
1013 			uint32* size = (uint32*)value;
1014 			*size = socket->receive.buffer_size;
1015 			*_length = sizeof(uint32);
1016 			return B_OK;
1017 		}
1018 
1019 		case SO_SNDLOWAT:
1020 		{
1021 			uint32* size = (uint32*)value;
1022 			*size = socket->send.low_water_mark;
1023 			*_length = sizeof(uint32);
1024 			return B_OK;
1025 		}
1026 
1027 		case SO_RCVLOWAT:
1028 		{
1029 			uint32* size = (uint32*)value;
1030 			*size = socket->receive.low_water_mark;
1031 			*_length = sizeof(uint32);
1032 			return B_OK;
1033 		}
1034 
1035 		case SO_RCVTIMEO:
1036 		case SO_SNDTIMEO:
1037 		{
1038 			if (*_length < (int)sizeof(struct timeval))
1039 				return B_BAD_VALUE;
1040 
1041 			bigtime_t timeout;
1042 			if (option == SO_SNDTIMEO)
1043 				timeout = socket->send.timeout;
1044 			else
1045 				timeout = socket->receive.timeout;
1046 			if (timeout == B_INFINITE_TIMEOUT)
1047 				timeout = 0;
1048 
1049 			struct timeval* timeval = (struct timeval*)value;
1050 			timeval->tv_sec = timeout / 1000000LL;
1051 			timeval->tv_usec = timeout % 1000000LL;
1052 
1053 			*_length = sizeof(struct timeval);
1054 			return B_OK;
1055 		}
1056 
1057 		case SO_NONBLOCK:
1058 		{
1059 			int32* _set = (int32*)value;
1060 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1061 			*_length = sizeof(int32);
1062 			return B_OK;
1063 		}
1064 
1065 		case SO_ACCEPTCONN:
1066 		case SO_BROADCAST:
1067 		case SO_DEBUG:
1068 		case SO_DONTROUTE:
1069 		case SO_KEEPALIVE:
1070 		case SO_OOBINLINE:
1071 		case SO_REUSEADDR:
1072 		case SO_REUSEPORT:
1073 		case SO_USELOOPBACK:
1074 		{
1075 			int32* _set = (int32*)value;
1076 			*_set = (socket->options & option) != 0;
1077 			*_length = sizeof(int32);
1078 			return B_OK;
1079 		}
1080 
1081 		case SO_TYPE:
1082 		{
1083 			int32* _set = (int32*)value;
1084 			*_set = socket->type;
1085 			*_length = sizeof(int32);
1086 			return B_OK;
1087 		}
1088 
1089 		case SO_ERROR:
1090 		{
1091 			int32* _set = (int32*)value;
1092 			*_set = socket->error;
1093 			*_length = sizeof(int32);
1094 
1095 			socket->error = B_OK;
1096 				// clear error upon retrieval
1097 			return B_OK;
1098 		}
1099 
1100 		default:
1101 			break;
1102 	}
1103 
1104 	dprintf("socket_getsockopt: unknown option %d\n", option);
1105 	return ENOPROTOOPT;
1106 }
1107 
1108 
1109 int
1110 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1111 	int* _length)
1112 {
1113 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1114 		level, option, value, _length);
1115 }
1116 
1117 
1118 int
1119 socket_listen(net_socket* socket, int backlog)
1120 {
1121 	status_t status = socket->first_info->listen(socket->first_protocol,
1122 		backlog);
1123 	if (status == B_OK)
1124 		socket->options |= SO_ACCEPTCONN;
1125 
1126 	return status;
1127 }
1128 
1129 
1130 ssize_t
1131 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1132 	int flags)
1133 {
1134 	const int originalFlags = flags;
1135 
1136 	// MSG_NOSIGNAL is only meaningful for send(), not receive(), but it is
1137 	// sometimes specified anyway. Mask it off to avoid unnecessary errors.
1138 	flags &= ~MSG_NOSIGNAL;
1139 
1140 	// If the protocol sports read_data_no_buffer() we use it.
1141 	if (socket->first_info->read_data_no_buffer != NULL)
1142 		return socket_receive_no_buffer(socket, header, data, length, flags);
1143 
1144 	// Mask off flags handled in this function.
1145 	flags &= ~(MSG_TRUNC);
1146 
1147 	size_t totalLength = length;
1148 	if (header != NULL) {
1149 		ASSERT(data == header->msg_iov[0].iov_base);
1150 
1151 		// calculate the length considering all of the extra buffers
1152 		for (int i = 1; i < header->msg_iovlen; i++)
1153 			totalLength += header->msg_iov[i].iov_len;
1154 	}
1155 
1156 	net_buffer* buffer;
1157 	status_t status = socket->first_info->read_data(
1158 		socket->first_protocol, totalLength, flags, &buffer);
1159 	if (status != B_OK)
1160 		return status;
1161 
1162 	// process ancillary data
1163 	if (header != NULL) {
1164 		if (buffer != NULL && header->msg_control != NULL) {
1165 			ancillary_data_container* container
1166 				= gNetBufferModule.get_ancillary_data(buffer);
1167 			if (container != NULL)
1168 				status = process_ancillary_data(socket, container, header);
1169 			else
1170 				status = process_ancillary_data(socket, buffer, header);
1171 			if (status != B_OK) {
1172 				gNetBufferModule.free(buffer);
1173 				return status;
1174 			}
1175 		} else
1176 			header->msg_controllen = 0;
1177 	}
1178 
1179 	// TODO: - returning a NULL buffer when received 0 bytes
1180 	//         may not make much sense as we still need the address
1181 
1182 	size_t nameLen = 0;
1183 	if (header != NULL) {
1184 		// TODO: - consider the control buffer options
1185 		nameLen = header->msg_namelen;
1186 		header->msg_namelen = 0;
1187 		header->msg_flags = 0;
1188 	}
1189 
1190 	if (buffer == NULL)
1191 		return 0;
1192 
1193 	const size_t bytesReceived = buffer->size;
1194 	size_t bytesCopied = 0;
1195 
1196 	size_t toRead = min_c(bytesReceived, length);
1197 	status = gNetBufferModule.read(buffer, 0, data, toRead);
1198 	if (status != B_OK) {
1199 		gNetBufferModule.free(buffer);
1200 
1201 		if (status == B_BAD_ADDRESS)
1202 			return status;
1203 		return ENOBUFS;
1204 	}
1205 
1206 	// if first copy was a success, proceed to following copies as required
1207 	bytesCopied += toRead;
1208 
1209 	if (header != NULL) {
1210 		// We start at iovec[1] as { data, length } is iovec[0].
1211 		for (int i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1212 			iovec& vec = header->msg_iov[i];
1213 			toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1214 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1215 					toRead) < B_OK) {
1216 				break;
1217 			}
1218 
1219 			bytesCopied += toRead;
1220 		}
1221 
1222 		if (header->msg_name != NULL) {
1223 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1224 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1225 		}
1226 	}
1227 
1228 	gNetBufferModule.free(buffer);
1229 
1230 	if (bytesCopied < bytesReceived) {
1231 		if (header != NULL)
1232 			header->msg_flags = MSG_TRUNC;
1233 
1234 		if ((originalFlags & MSG_TRUNC) != 0)
1235 			return bytesReceived;
1236 	}
1237 
1238 	return bytesCopied;
1239 }
1240 
1241 
1242 ssize_t
1243 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1244 	int flags)
1245 {
1246 	const bool nosignal = ((flags & MSG_NOSIGNAL) != 0);
1247 	flags &= ~MSG_NOSIGNAL;
1248 
1249 	size_t bytesLeft = length;
1250 	if (length > SSIZE_MAX)
1251 		return B_BAD_VALUE;
1252 
1253 	ancillary_data_container* ancillaryData = NULL;
1254 	CObjectDeleter<
1255 		ancillary_data_container, void, delete_ancillary_data_container>
1256 		ancillaryDataDeleter;
1257 
1258 	const sockaddr* address = NULL;
1259 	socklen_t addressLength = 0;
1260 	if (header != NULL) {
1261 		address = (const sockaddr*)header->msg_name;
1262 		addressLength = header->msg_namelen;
1263 
1264 		// get the ancillary data
1265 		if (header->msg_control != NULL) {
1266 			ancillaryData = create_ancillary_data_container();
1267 			if (ancillaryData == NULL)
1268 				return B_NO_MEMORY;
1269 			ancillaryDataDeleter.SetTo(ancillaryData);
1270 
1271 			status_t status = add_ancillary_data(socket, ancillaryData,
1272 				(cmsghdr*)header->msg_control, header->msg_controllen);
1273 			if (status != B_OK)
1274 				return status;
1275 		}
1276 	}
1277 
1278 	if (addressLength == 0)
1279 		address = NULL;
1280 	else if (address == NULL)
1281 		return B_BAD_VALUE;
1282 
1283 	if (socket->peer.ss_len != 0) {
1284 		if (address != NULL)
1285 			return EISCONN;
1286 
1287 		// socket is connected, we use that address
1288 		address = (struct sockaddr*)&socket->peer;
1289 		addressLength = socket->peer.ss_len;
1290 	}
1291 
1292 	if (address == NULL || addressLength == 0) {
1293 		// don't know where to send to:
1294 		return EDESTADDRREQ;
1295 	}
1296 
1297 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1298 		&& bytesLeft > socket->send.buffer_size)
1299 		return EMSGSIZE;
1300 
1301 	if (socket->address.ss_len == 0) {
1302 		// try to bind first
1303 		status_t status = socket_bind(socket, NULL, 0);
1304 		if (status != B_OK)
1305 			return status;
1306 	}
1307 
1308 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1309 	if (socket->first_info->send_data_no_buffer != NULL) {
1310 		iovec stackVec = { (void*)data, length };
1311 		iovec* vecs = header ? header->msg_iov : &stackVec;
1312 		int vecCount = header ? header->msg_iovlen : 1;
1313 
1314 		ssize_t written = socket->first_info->send_data_no_buffer(
1315 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1316 			addressLength, flags);
1317 
1318 		// we only send signals when called from userland
1319 		if (written == EPIPE && is_syscall() && !nosignal)
1320 			send_signal(find_thread(NULL), SIGPIPE);
1321 
1322 		if (written > 0)
1323 			ancillaryDataDeleter.Detach();
1324 		return written;
1325 	}
1326 
1327 	// By convention, if a header is given, the (data, length) equals the first
1328 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1329 	// the size of the remaining ones.
1330 	if (header != NULL) {
1331 		if (header->msg_iovlen <= 1) {
1332 			header = NULL;
1333 		} else {
1334 			for (int i = 1; i < header->msg_iovlen; i++)
1335 				bytesLeft += header->msg_iov[i].iov_len;
1336 		}
1337 	}
1338 
1339 	ssize_t bytesSent = 0;
1340 	size_t vecOffset = 0;
1341 	uint32 vecIndex = 0;
1342 
1343 	while (bytesLeft > 0) {
1344 		// TODO: useful, maybe even computed header space!
1345 		net_buffer* buffer = gNetBufferModule.create(256);
1346 		if (buffer == NULL)
1347 			return ENOBUFS;
1348 
1349 		while (buffer->size < socket->send.buffer_size
1350 			&& buffer->size < bytesLeft) {
1351 			if (vecIndex > 0 && vecOffset == 0) {
1352 				// retrieve next iovec buffer from header
1353 				data = header->msg_iov[vecIndex].iov_base;
1354 				length = header->msg_iov[vecIndex].iov_len;
1355 			}
1356 
1357 			size_t bytes = length;
1358 			if (buffer->size + bytes > socket->send.buffer_size)
1359 				bytes = socket->send.buffer_size - buffer->size;
1360 
1361 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1362 				gNetBufferModule.free(buffer);
1363 				return ENOBUFS;
1364 			}
1365 
1366 			if (bytes != length) {
1367 				// partial send
1368 				vecOffset = bytes;
1369 				length -= vecOffset;
1370 				data = (uint8*)data + vecOffset;
1371 			} else if (header != NULL) {
1372 				// proceed with next buffer, if any
1373 				vecOffset = 0;
1374 				vecIndex++;
1375 
1376 				if (vecIndex >= (uint32)header->msg_iovlen)
1377 					break;
1378 			}
1379 		}
1380 
1381 		// attach ancillary data to the first buffer
1382 		status_t status;
1383 		if (ancillaryData != NULL) {
1384 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1385 			ancillaryDataDeleter.Detach();
1386 			ancillaryData = NULL;
1387 		}
1388 
1389 		size_t bufferSize = buffer->size;
1390 		buffer->msg_flags = flags;
1391 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1392 		memcpy(buffer->destination, address, addressLength);
1393 		buffer->destination->sa_len = addressLength;
1394 
1395 		status = socket->first_info->send_data(socket->first_protocol, buffer);
1396 		if (status != B_OK) {
1397 			// we only send signals when called from userland
1398 			if (status == EPIPE && is_syscall() && !nosignal)
1399 				send_signal(find_thread(NULL), SIGPIPE);
1400 
1401 			size_t sizeAfterSend = buffer->size;
1402 			gNetBufferModule.free(buffer);
1403 
1404 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1405 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1406 				// this appears to be a partial write
1407 				return bytesSent + (bufferSize - sizeAfterSend);
1408 			}
1409 			return status;
1410 		}
1411 
1412 		bytesLeft -= bufferSize;
1413 		bytesSent += bufferSize;
1414 	}
1415 
1416 	return bytesSent;
1417 }
1418 
1419 
1420 status_t
1421 socket_set_option(net_socket* socket, int level, int option, const void* value,
1422 	int length)
1423 {
1424 	if (level != SOL_SOCKET)
1425 		return ENOPROTOOPT;
1426 
1427 	TRACE("%s(socket %p, option %d\n", __FUNCTION__, socket, option);
1428 
1429 	switch (option) {
1430 		// TODO: implement other options!
1431 		case SO_LINGER:
1432 		{
1433 			if (length < (int)sizeof(struct linger))
1434 				return B_BAD_VALUE;
1435 
1436 			struct linger* linger = (struct linger*)value;
1437 			if (linger->l_onoff) {
1438 				socket->options |= SO_LINGER;
1439 				socket->linger = linger->l_linger;
1440 			} else {
1441 				socket->options &= ~SO_LINGER;
1442 				socket->linger = 0;
1443 			}
1444 			return B_OK;
1445 		}
1446 
1447 		case SO_SNDBUF:
1448 			if (length != sizeof(uint32))
1449 				return B_BAD_VALUE;
1450 
1451 			socket->send.buffer_size = *(const uint32*)value;
1452 			return B_OK;
1453 
1454 		case SO_RCVBUF:
1455 			if (length != sizeof(uint32))
1456 				return B_BAD_VALUE;
1457 
1458 			socket->receive.buffer_size = *(const uint32*)value;
1459 			return B_OK;
1460 
1461 		case SO_SNDLOWAT:
1462 			if (length != sizeof(uint32))
1463 				return B_BAD_VALUE;
1464 
1465 			socket->send.low_water_mark = *(const uint32*)value;
1466 			return B_OK;
1467 
1468 		case SO_RCVLOWAT:
1469 			if (length != sizeof(uint32))
1470 				return B_BAD_VALUE;
1471 
1472 			socket->receive.low_water_mark = *(const uint32*)value;
1473 			return B_OK;
1474 
1475 		case SO_RCVTIMEO:
1476 		case SO_SNDTIMEO:
1477 		{
1478 			if (length != sizeof(struct timeval))
1479 				return B_BAD_VALUE;
1480 
1481 			const struct timeval* timeval = (const struct timeval*)value;
1482 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1483 			if (timeout == 0)
1484 				timeout = B_INFINITE_TIMEOUT;
1485 
1486 			if (option == SO_SNDTIMEO)
1487 				socket->send.timeout = timeout;
1488 			else
1489 				socket->receive.timeout = timeout;
1490 			return B_OK;
1491 		}
1492 
1493 		case SO_NONBLOCK:
1494 			if (length != sizeof(int32))
1495 				return B_BAD_VALUE;
1496 
1497 			if (*(const int32*)value) {
1498 				socket->send.timeout = 0;
1499 				socket->receive.timeout = 0;
1500 			} else {
1501 				socket->send.timeout = B_INFINITE_TIMEOUT;
1502 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1503 			}
1504 			return B_OK;
1505 
1506 		case SO_BROADCAST:
1507 		case SO_DEBUG:
1508 		case SO_DONTROUTE:
1509 		case SO_KEEPALIVE:
1510 		case SO_OOBINLINE:
1511 		case SO_REUSEADDR:
1512 		case SO_REUSEPORT:
1513 		case SO_USELOOPBACK:
1514 			if (length != sizeof(int32))
1515 				return B_BAD_VALUE;
1516 
1517 			if (*(const int32*)value)
1518 				socket->options |= option;
1519 			else
1520 				socket->options &= ~option;
1521 			return B_OK;
1522 
1523 		case SO_BINDTODEVICE:
1524 		{
1525 			if (length != sizeof(uint32))
1526 				return B_BAD_VALUE;
1527 
1528 			// TODO: we might want to check if the device exists at all
1529 			// (although it doesn't really harm when we don't)
1530 			socket->bound_to_device = *(const uint32*)value;
1531 			return B_OK;
1532 		}
1533 
1534 		default:
1535 			break;
1536 	}
1537 
1538 	dprintf("socket_setsockopt: unknown option %d\n", option);
1539 	return ENOPROTOOPT;
1540 }
1541 
1542 
1543 int
1544 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1545 	int length)
1546 {
1547 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1548 		level, option, value, length);
1549 }
1550 
1551 
1552 int
1553 socket_shutdown(net_socket* socket, int direction)
1554 {
1555 	return socket->first_info->shutdown(socket->first_protocol, direction);
1556 }
1557 
1558 
1559 status_t
1560 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1561 {
1562 	sockets[0] = NULL;
1563 	sockets[1] = NULL;
1564 
1565 	// create sockets
1566 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1567 	if (error != B_OK)
1568 		return error;
1569 
1570 	error = socket_open(family, type, protocol, &sockets[1]);
1571 
1572 	// bind one
1573 	if (error == B_OK)
1574 		error = socket_bind(sockets[0], NULL, 0);
1575 
1576 	// start listening
1577 	if (error == B_OK && type == SOCK_STREAM)
1578 		error = socket_listen(sockets[0], 1);
1579 
1580 	// connect them
1581 	if (error == B_OK) {
1582 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1583 			sockets[0]->address.ss_len);
1584 	}
1585 
1586 	if (error == B_OK) {
1587 		// accept a socket
1588 		if (type == SOCK_STREAM) {
1589 			net_socket* acceptedSocket = NULL;
1590 			error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1591 			if (error == B_OK) {
1592 				// everything worked: close the listener socket
1593 				socket_close(sockets[0]);
1594 				socket_free(sockets[0]);
1595 				sockets[0] = acceptedSocket;
1596 			}
1597 		// connect the other side
1598 		} else {
1599 			error = socket_connect(sockets[0], (sockaddr*)&sockets[1]->address,
1600 				sockets[1]->address.ss_len);
1601 		}
1602 	}
1603 
1604 	if (error != B_OK) {
1605 		// close sockets on error
1606 		for (int i = 0; i < 2; i++) {
1607 			if (sockets[i] != NULL) {
1608 				socket_close(sockets[i]);
1609 				socket_free(sockets[i]);
1610 				sockets[i] = NULL;
1611 			}
1612 		}
1613 	}
1614 
1615 	return error;
1616 }
1617 
1618 
1619 //	#pragma mark -
1620 
1621 
1622 static status_t
1623 socket_std_ops(int32 op, ...)
1624 {
1625 	switch (op) {
1626 		case B_MODULE_INIT:
1627 		{
1628 			new (&sSocketList) SocketList;
1629 			mutex_init(&sSocketLock, "socket list");
1630 
1631 #if ENABLE_DEBUGGER_COMMANDS
1632 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1633 			add_debugger_command("socket", dump_socket, "dumps a socket");
1634 #endif
1635 			return B_OK;
1636 		}
1637 		case B_MODULE_UNINIT:
1638 			ASSERT(sSocketList.IsEmpty());
1639 			mutex_destroy(&sSocketLock);
1640 
1641 #if ENABLE_DEBUGGER_COMMANDS
1642 			remove_debugger_command("socket", dump_socket);
1643 			remove_debugger_command("sockets", dump_sockets);
1644 #endif
1645 			return B_OK;
1646 
1647 		default:
1648 			return B_ERROR;
1649 	}
1650 }
1651 
1652 
1653 net_socket_module_info gNetSocketModule = {
1654 	{
1655 		NET_SOCKET_MODULE_NAME,
1656 		0,
1657 		socket_std_ops
1658 	},
1659 	socket_open,
1660 	socket_close,
1661 	socket_free,
1662 
1663 	socket_control,
1664 
1665 	socket_read_avail,
1666 	socket_send_avail,
1667 
1668 	socket_send_data,
1669 	socket_receive_data,
1670 
1671 	socket_get_option,
1672 	socket_set_option,
1673 
1674 	socket_get_next_stat,
1675 
1676 	// connections
1677 	socket_acquire,
1678 	socket_release,
1679 	socket_spawn_pending,
1680 	socket_dequeue_connected,
1681 	socket_count_connected,
1682 	socket_set_max_backlog,
1683 	socket_has_parent,
1684 	socket_connected,
1685 	socket_aborted,
1686 
1687 	// notifications
1688 	socket_request_notification,
1689 	socket_cancel_notification,
1690 	socket_notify,
1691 
1692 	// standard socket API
1693 	socket_accept,
1694 	socket_bind,
1695 	socket_connect,
1696 	socket_getpeername,
1697 	socket_getsockname,
1698 	socket_getsockopt,
1699 	socket_listen,
1700 	socket_receive,
1701 	socket_send,
1702 	socket_setsockopt,
1703 	socket_shutdown,
1704 	socket_socketpair
1705 };
1706 
1707