xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 3be9edf8da228afd9fec0390f408c964766122aa)
1 /*
2  * Copyright 2006-2009, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 #define ADD_DEBUGGER_COMMANDS
41 
42 
43 struct net_socket_private;
44 typedef DoublyLinkedList<net_socket_private> SocketList;
45 
46 struct net_socket_private : net_socket,
47 		DoublyLinkedListLinkImpl<net_socket_private>,
48 		WeakReferenceable<net_socket_private> {
49 	net_socket_private();
50 	~net_socket_private();
51 
52 	void RemoveFromParent();
53 
54 	WeakPointer<net_socket_private>* parent;
55 	team_id						owner;
56 	uint32						max_backlog;
57 	uint32						child_count;
58 	SocketList					pending_children;
59 	SocketList					connected_children;
60 
61 	struct select_sync_pool*	select_pool;
62 	mutex						lock;
63 
64 	bool						is_connected;
65 	bool						is_in_socket_list;
66 };
67 
68 
69 int socket_bind(net_socket* socket, const struct sockaddr* address,
70 	socklen_t addressLength);
71 int socket_setsockopt(net_socket* socket, int level, int option,
72 	const void* value, int length);
73 ssize_t socket_read_avail(net_socket* socket);
74 
75 static SocketList sSocketList;
76 static mutex sSocketLock;
77 
78 
79 net_socket_private::net_socket_private()
80 	: WeakReferenceable<net_socket_private>(this),
81 	parent(NULL),
82 	owner(-1),
83 	max_backlog(0),
84 	child_count(0),
85 	select_pool(NULL),
86 	is_connected(false),
87 	is_in_socket_list(false)
88 {
89 	first_protocol = NULL;
90 	first_info = NULL;
91 	options = 0;
92 	linger = 0;
93 	bound_to_device = 0;
94 	error = 0;
95 
96 	address.ss_len = 0;
97 	peer.ss_len = 0;
98 
99 	mutex_init(&lock, "socket");
100 
101 	// set defaults (may be overridden by the protocols)
102 	send.buffer_size = 65535;
103 	send.low_water_mark = 1;
104 	send.timeout = B_INFINITE_TIMEOUT;
105 	receive.buffer_size = 65535;
106 	receive.low_water_mark = 1;
107 	receive.timeout = B_INFINITE_TIMEOUT;
108 }
109 
110 
111 net_socket_private::~net_socket_private()
112 {
113 	if (parent != NULL)
114 		panic("socket still has a parent!");
115 
116 	if (is_in_socket_list) {
117 		MutexLocker _(sSocketLock);
118 		sSocketList.Remove(this);
119 	}
120 
121 	mutex_lock(&lock);
122 
123 	// also delete all children of this socket
124 	while (net_socket_private* child = pending_children.RemoveHead()) {
125 		child->RemoveFromParent();
126 	}
127 	while (net_socket_private* child = connected_children.RemoveHead()) {
128 		child->RemoveFromParent();
129 	}
130 
131 	put_domain_protocols(this);
132 
133 	mutex_unlock(&lock);
134 	mutex_destroy(&lock);
135 }
136 
137 
138 void
139 net_socket_private::RemoveFromParent()
140 {
141 	ASSERT(!is_in_socket_list && parent != NULL);
142 
143 	parent->RemoveReference();
144 	parent = NULL;
145 
146 	mutex_lock(&sSocketLock);
147 	sSocketList.Add(this);
148 	mutex_unlock(&sSocketLock);
149 
150 	is_in_socket_list = true;
151 
152 	RemoveReference();
153 }
154 
155 
156 //	#pragma mark -
157 
158 
159 static size_t
160 compute_user_iovec_length(iovec* userVec, uint32 count)
161 {
162 	size_t length = 0;
163 
164 	for (uint32 i = 0; i < count; i++) {
165 		iovec vec;
166 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
167 			return 0;
168 
169 		length += vec.iov_len;
170 	}
171 
172 	return length;
173 }
174 
175 
176 static status_t
177 create_socket(int family, int type, int protocol, net_socket_private** _socket)
178 {
179 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
180 	if (socket == NULL)
181 		return B_NO_MEMORY;
182 
183 	socket->family = family;
184 	socket->type = type;
185 	socket->protocol = protocol;
186 
187 	status_t status = get_domain_protocols(socket);
188 	if (status < B_OK) {
189 		delete socket;
190 		return status;
191 	}
192 
193 	*_socket = socket;
194 	return B_OK;
195 }
196 
197 
198 static status_t
199 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
200 	void* data, size_t dataLen)
201 {
202 	cmsghdr* header = (cmsghdr*)data;
203 
204 	while (dataLen > 0) {
205 		if (header->cmsg_len < sizeof(cmsghdr) || header->cmsg_len > dataLen)
206 			return B_BAD_VALUE;
207 
208 		if (socket->first_info->add_ancillary_data == NULL)
209 			return EOPNOTSUPP;
210 
211 		status_t status = socket->first_info->add_ancillary_data(
212 			socket->first_protocol, container, header);
213 		if (status != B_OK)
214 			return status;
215 
216 		dataLen -= _ALIGN(header->cmsg_len);
217 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
218 	}
219 
220 	return B_OK;
221 }
222 
223 
224 static status_t
225 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
226 	msghdr* messageHeader)
227 {
228 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
229 	int dataBufferLen = messageHeader->msg_controllen;
230 
231 	if (container == NULL || dataBuffer == NULL) {
232 		messageHeader->msg_controllen = 0;
233 		return B_OK;
234 	}
235 
236 	ancillary_data_header header;
237 	void* data = NULL;
238 
239 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
240 		if (socket->first_info->process_ancillary_data == NULL)
241 			return EOPNOTSUPP;
242 
243 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
244 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
245 		if (bytesWritten < 0)
246 			return bytesWritten;
247 
248 		dataBuffer += bytesWritten;
249 		dataBufferLen -= bytesWritten;
250 	}
251 
252 	messageHeader->msg_controllen -= dataBufferLen;
253 
254 	return B_OK;
255 }
256 
257 
258 static status_t
259 process_ancillary_data(net_socket* socket,
260 	net_buffer* buffer, msghdr* messageHeader)
261 {
262 	void *dataBuffer = messageHeader->msg_control;
263 	ssize_t bytesWritten;
264 
265 	if (dataBuffer == NULL) {
266 		messageHeader->msg_controllen = 0;
267 		return B_OK;
268 	}
269 
270 	if (socket->first_info->process_ancillary_data_no_container == NULL)
271 		return EOPNOTSUPP;
272 
273 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
274 		socket->first_protocol, buffer, dataBuffer,
275 		messageHeader->msg_controllen);
276 	if (bytesWritten < 0)
277 		return bytesWritten;
278 	messageHeader->msg_controllen = bytesWritten;
279 
280 	return B_OK;
281 }
282 
283 
284 static ssize_t
285 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
286 	size_t length, int flags)
287 {
288 	iovec stackVec = { data, length };
289 	iovec* vecs = header ? header->msg_iov : &stackVec;
290 	int vecCount = header ? header->msg_iovlen : 1;
291 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
292 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
293 
294 	ancillary_data_container* ancillaryData = NULL;
295 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
296 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
297 		addressLen);
298 	if (bytesRead < 0)
299 		return bytesRead;
300 
301 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(ancillaryData,
302 		&delete_ancillary_data_container);
303 
304 	// process ancillary data
305 	if (header != NULL) {
306 		status_t status = process_ancillary_data(socket, ancillaryData, header);
307 		if (status != B_OK)
308 			return status;
309 
310 		header->msg_flags = 0;
311 	}
312 
313 	return bytesRead;
314 }
315 
316 
317 #ifdef ADD_DEBUGGER_COMMANDS
318 
319 static void
320 print_socket_line(net_socket_private* socket, const char* prefix)
321 {
322 	kprintf("%s%p %2d.%2d.%2d %6ld %p %p  %p%s\n", prefix, socket,
323 		socket->family, socket->type, socket->protocol, socket->owner,
324 		socket->first_protocol, socket->first_info, socket->parent,
325 		socket->parent != NULL ? socket->is_connected ? " (c)" : " (p)" : "");
326 }
327 
328 
329 static int
330 dump_socket(int argc, char** argv)
331 {
332 	if (argc < 2) {
333 		kprintf("usage: %s [address]\n", argv[0]);
334 		return 0;
335 	}
336 
337 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
338 
339 	kprintf("SOCKET %p\n", socket);
340 	kprintf("  family.type.protocol: %d.%d.%d\n",
341 		socket->family, socket->type, socket->protocol);
342 	WeakReference<net_socket_private> parent = socket->parent;
343 	kprintf("  parent:               %p (%p)\n", parent.Get(), socket->parent);
344 	kprintf("  first protocol:       %p\n", socket->first_protocol);
345 	kprintf("  first module_info:    %p\n", socket->first_info);
346 	kprintf("  options:              %x\n", socket->options);
347 	kprintf("  linger:               %d\n", socket->linger);
348 	kprintf("  bound to device:      %d\n", socket->bound_to_device);
349 	kprintf("  owner:                %ld\n", socket->owner);
350 	kprintf("  max backlog:          %ld\n", socket->max_backlog);
351 	kprintf("  is connected:         %d\n", socket->is_connected);
352 	kprintf("  child_count:          %lu\n", socket->child_count);
353 
354 	if (socket->child_count == 0)
355 		return 0;
356 
357 	kprintf("    pending children:\n");
358 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
359 	while (net_socket_private* child = iterator.Next()) {
360 		print_socket_line(child, "      ");
361 	}
362 
363 	kprintf("    connected children:\n");
364 	iterator = socket->connected_children.GetIterator();
365 	while (net_socket_private* child = iterator.Next()) {
366 		print_socket_line(child, "      ");
367 	}
368 
369 	return 0;
370 }
371 
372 
373 static int
374 dump_sockets(int argc, char** argv)
375 {
376 	kprintf("address        kind  owner protocol   module_info parent\n");
377 
378 	SocketList::Iterator iterator = sSocketList.GetIterator();
379 	while (net_socket_private* socket = iterator.Next()) {
380 		print_socket_line(socket, "");
381 
382 		SocketList::Iterator childIterator
383 			= socket->pending_children.GetIterator();
384 		while (net_socket_private* child = childIterator.Next()) {
385 			print_socket_line(child, " ");
386 		}
387 
388 		childIterator = socket->connected_children.GetIterator();
389 		while (net_socket_private* child = childIterator.Next()) {
390 			print_socket_line(child, " ");
391 		}
392 	}
393 
394 	return 0;
395 }
396 
397 #endif	// ADD_DEBUGGER_COMMANDS
398 
399 
400 //	#pragma mark -
401 
402 
403 status_t
404 socket_open(int family, int type, int protocol, net_socket** _socket)
405 {
406 	net_socket_private* socket;
407 	status_t status = create_socket(family, type, protocol, &socket);
408 	if (status < B_OK)
409 		return status;
410 
411 	status = socket->first_info->open(socket->first_protocol);
412 	if (status < B_OK) {
413 		delete socket;
414 		return status;
415 	}
416 
417 	socket->owner = team_get_current_team_id();
418 	socket->is_in_socket_list = true;
419 
420 	mutex_lock(&sSocketLock);
421 	sSocketList.Add(socket);
422 	mutex_unlock(&sSocketLock);
423 
424 	*_socket = socket;
425 	return B_OK;
426 }
427 
428 
429 status_t
430 socket_close(net_socket* _socket)
431 {
432 	net_socket_private* socket = (net_socket_private*)_socket;
433 	return socket->first_info->close(socket->first_protocol);
434 }
435 
436 
437 void
438 socket_free(net_socket* _socket)
439 {
440 	net_socket_private* socket = (net_socket_private*)_socket;
441 	socket->first_info->free(socket->first_protocol);
442 	socket->RemoveReference();
443 }
444 
445 
446 status_t
447 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
448 	size_t* _length)
449 {
450 	return -1;
451 }
452 
453 
454 status_t
455 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
456 	size_t* _length)
457 {
458 	if (socket->peer.ss_len == 0)
459 		return ECONNRESET;
460 
461 	if (socket->address.ss_len == 0) {
462 		// try to bind first
463 		status_t status = socket_bind(socket, NULL, 0);
464 		if (status < B_OK)
465 			return status;
466 	}
467 
468 	// TODO: useful, maybe even computed header space!
469 	net_buffer* buffer = gNetBufferModule.create(256);
470 	if (buffer == NULL)
471 		return ENOBUFS;
472 
473 	// copy data into buffer
474 
475 	for (uint32 i = 0; i < vecCount; i++) {
476 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
477 				vecs[i].iov_len) < B_OK) {
478 			gNetBufferModule.free(buffer);
479 			return ENOBUFS;
480 		}
481 	}
482 
483 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
484 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
485 	size_t size = buffer->size;
486 
487 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
488 		buffer);
489 	if (bytesWritten < B_OK) {
490 		if (buffer->size != size) {
491 			// this appears to be a partial write
492 			*_length = size - buffer->size;
493 		}
494 		gNetBufferModule.free(buffer);
495 		return bytesWritten;
496 	}
497 
498 	*_length = bytesWritten;
499 	return B_OK;
500 }
501 
502 
503 status_t
504 socket_control(net_socket* socket, int32 op, void* data, size_t length)
505 {
506 	switch (op) {
507 		case FIONBIO:
508 		{
509 			if (data == NULL)
510 				return B_BAD_VALUE;
511 
512 			int value;
513 			if (is_syscall()) {
514 				if (!IS_USER_ADDRESS(data)
515 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
516 					return B_BAD_ADDRESS;
517 				}
518 			} else
519 				value = *(int*)data;
520 
521 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
522 				sizeof(int));
523 		}
524 
525 		case FIONREAD:
526 		{
527 			if (data == NULL)
528 				return B_BAD_VALUE;
529 
530 			ssize_t available = socket_read_avail(socket);
531 			if (available < B_OK)
532 				return available;
533 
534 			if (is_syscall()) {
535 				if (!IS_USER_ADDRESS(data)
536 					|| user_memcpy(data, &available, sizeof(ssize_t)) != B_OK) {
537 					return B_BAD_ADDRESS;
538 				}
539 			} else
540 				*(ssize_t *)data = available;
541 
542 			return B_OK;
543 		}
544 
545 		case B_SET_BLOCKING_IO:
546 		case B_SET_NONBLOCKING_IO:
547 		{
548 			int value = op == B_SET_NONBLOCKING_IO;
549 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
550 				sizeof(int));
551 		}
552 	}
553 
554 	return socket->first_info->control(socket->first_protocol,
555 		LEVEL_DRIVER_IOCTL, op, data, &length);
556 }
557 
558 
559 ssize_t
560 socket_read_avail(net_socket* socket)
561 {
562 	return socket->first_info->read_avail(socket->first_protocol);
563 }
564 
565 
566 ssize_t
567 socket_send_avail(net_socket* socket)
568 {
569 	return socket->first_info->send_avail(socket->first_protocol);
570 }
571 
572 
573 status_t
574 socket_send_data(net_socket* socket, net_buffer* buffer)
575 {
576 	return socket->first_info->send_data(socket->first_protocol,
577 		buffer);
578 }
579 
580 
581 status_t
582 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
583 	net_buffer** _buffer)
584 {
585 	status_t status = socket->first_info->read_data(socket->first_protocol,
586 		length, flags, _buffer);
587 
588 	if (status < B_OK)
589 		return status;
590 
591 	if (*_buffer && length < (*_buffer)->size) {
592 		// discard any data behind the amount requested
593 		gNetBufferModule.trim(*_buffer, length);
594 	}
595 
596 	return status;
597 }
598 
599 
600 status_t
601 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
602 {
603 	MutexLocker locker(sSocketLock);
604 
605 	net_socket_private* socket = NULL;
606 	SocketList::Iterator iterator = sSocketList.GetIterator();
607 	uint32 cookie = *_cookie;
608 	uint32 count = 0;
609 
610 	while (true) {
611 		socket = iterator.Next();
612 		if (socket == NULL)
613 			return B_ENTRY_NOT_FOUND;
614 
615 		// TODO: also traverse the pending connections
616 		if (count == cookie)
617 			break;
618 
619 		if (family == -1 || family == socket->family)
620 			count++;
621 	}
622 
623 	*_cookie = count + 1;
624 
625 	stat->family = socket->family;
626 	stat->type = socket->type;
627 	stat->protocol = socket->protocol;
628 	stat->owner = socket->owner;
629 	stat->state[0] = '\0';
630 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
631 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
632 	stat->receive_queue_size = 0;
633 	stat->send_queue_size = 0;
634 
635 	// fill in protocol specific data (if supported by the protocol)
636 	size_t length = sizeof(net_stat);
637 	socket->first_info->control(socket->first_protocol, socket->protocol,
638 		NET_STAT_SOCKET, stat, &length);
639 
640 	return B_OK;
641 }
642 
643 
644 //	#pragma mark - connections
645 
646 
647 bool
648 socket_acquire(net_socket* _socket)
649 {
650 	net_socket_private* socket = (net_socket_private*)_socket;
651 
652 	// During destruction, the socket might still be accessible over its endpoint
653 	// protocol. We need to make sure the endpoint cannot acquire the socket
654 	// anymore -- while not obvious, the endpoint protocol is responsible for the
655 	// proper locking here.
656 	if (socket->CountReferences() == 0)
657 		return false;
658 
659 	socket->AddReference();
660 	return true;
661 }
662 
663 
664 bool
665 socket_release(net_socket* _socket)
666 {
667 	net_socket_private* socket = (net_socket_private*)_socket;
668 	return socket->RemoveReference();
669 }
670 
671 
672 status_t
673 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
674 {
675 	net_socket_private* parent = (net_socket_private*)_parent;
676 
677 	MutexLocker locker(parent->lock);
678 
679 	// We actually accept more pending connections to compensate for those
680 	// that never complete, and also make sure at least a single connection
681 	// can always be accepted
682 	if (parent->child_count > 3 * parent->max_backlog / 2)
683 		return ENOBUFS;
684 
685 	net_socket_private* socket;
686 	status_t status = create_socket(parent->family, parent->type,
687 		parent->protocol, &socket);
688 	if (status < B_OK)
689 		return status;
690 
691 	// inherit parent's properties
692 	socket->send = parent->send;
693 	socket->receive = parent->receive;
694 	socket->options = parent->options & ~SO_ACCEPTCONN;
695 	socket->linger = parent->linger;
696 	socket->owner = parent->owner;
697 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
698 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
699 
700 	// add to the parent's list of pending connections
701 	parent->pending_children.Add(socket);
702 	socket->parent = parent->GetWeakPointer();
703 	parent->child_count++;
704 
705 	*_socket = socket;
706 	return B_OK;
707 }
708 
709 
710 /*!	Dequeues a connected child from a parent socket.
711 	It also returns a reference with the child socket.
712 */
713 status_t
714 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
715 {
716 	net_socket_private* parent = (net_socket_private*)_parent;
717 
718 	mutex_lock(&parent->lock);
719 
720 	net_socket_private* socket = parent->connected_children.RemoveHead();
721 	if (socket != NULL) {
722 		socket->AddReference();
723 		socket->RemoveFromParent();
724 		parent->child_count--;
725 		*_socket = socket;
726 	}
727 
728 	mutex_unlock(&parent->lock);
729 
730 	if (socket == NULL)
731 		return B_ENTRY_NOT_FOUND;
732 
733 	return B_OK;
734 }
735 
736 
737 ssize_t
738 socket_count_connected(net_socket* _parent)
739 {
740 	net_socket_private* parent = (net_socket_private*)_parent;
741 
742 	MutexLocker _(parent->lock);
743 	return parent->connected_children.Count();
744 }
745 
746 
747 status_t
748 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
749 {
750 	net_socket_private* socket = (net_socket_private*)_socket;
751 
752 	// we enforce an upper limit of connections waiting to be accepted
753 	if (backlog > 256)
754 		backlog = 256;
755 
756 	MutexLocker _(socket->lock);
757 
758 	// first remove the pending connections, then the already connected
759 	// ones as needed
760 	net_socket_private* child;
761 	while (socket->child_count > backlog
762 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
763 		child->RemoveFromParent();
764 		socket->child_count--;
765 	}
766 	while (socket->child_count > backlog
767 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
768 		child->RemoveFromParent();
769 		socket->child_count--;
770 	}
771 
772 	socket->max_backlog = backlog;
773 	return B_OK;
774 }
775 
776 
777 /*!	Returns whether or not this socket has a parent. The parent might not be
778 	valid anymore, though.
779 */
780 bool
781 socket_has_parent(net_socket* _socket)
782 {
783 	net_socket_private* socket = (net_socket_private*)_socket;
784 	return socket->parent != NULL;
785 }
786 
787 
788 /*!	The socket has been connected. It will be moved to the connected queue
789 	of its parent socket.
790 */
791 status_t
792 socket_connected(net_socket* _socket)
793 {
794 	net_socket_private* socket = (net_socket_private*)_socket;
795 
796 	WeakReference<net_socket_private> parent = socket->parent;
797 	if (parent.Get() == NULL)
798 		return B_BAD_VALUE;
799 
800 	MutexLocker _(parent->lock);
801 
802 	parent->pending_children.Remove(socket);
803 	parent->connected_children.Add(socket);
804 	socket->is_connected = true;
805 
806 	// notify parent
807 	if (parent->select_pool)
808 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
809 
810 	return B_OK;
811 }
812 
813 
814 /*!	The socket has been aborted. Steals the parent's reference, and releases
815 	it.
816 */
817 status_t
818 socket_aborted(net_socket* _socket)
819 {
820 	net_socket_private* socket = (net_socket_private*)_socket;
821 
822 	WeakReference<net_socket_private> parent = socket->parent;
823 	if (parent.Get() == NULL)
824 		return B_BAD_VALUE;
825 
826 	MutexLocker _(parent->lock);
827 
828 	if (socket->is_connected)
829 		parent->connected_children.Remove(socket);
830 	else
831 		parent->pending_children.Remove(socket);
832 
833 	parent->child_count--;
834 	socket->RemoveFromParent();
835 
836 	return B_OK;
837 }
838 
839 
840 //	#pragma mark - notifications
841 
842 
843 status_t
844 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
845 {
846 	net_socket_private* socket = (net_socket_private*)_socket;
847 
848 	mutex_lock(&socket->lock);
849 
850 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
851 		event);
852 
853 	mutex_unlock(&socket->lock);
854 
855 	if (status < B_OK)
856 		return status;
857 
858 	// check if the event is already present
859 	// TODO: add support for poll() types
860 
861 	switch (event) {
862 		case B_SELECT_READ:
863 		{
864 			ssize_t available = socket_read_avail(socket);
865 			if ((ssize_t)socket->receive.low_water_mark <= available
866 				|| available < B_OK)
867 				notify_select_event(sync, event);
868 			break;
869 		}
870 		case B_SELECT_WRITE:
871 		{
872 			ssize_t available = socket_send_avail(socket);
873 			if ((ssize_t)socket->send.low_water_mark <= available
874 				|| available < B_OK)
875 				notify_select_event(sync, event);
876 			break;
877 		}
878 		case B_SELECT_ERROR:
879 			// TODO: B_SELECT_ERROR condition!
880 			break;
881 	}
882 
883 	return B_OK;
884 }
885 
886 
887 status_t
888 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
889 {
890 	net_socket_private* socket = (net_socket_private*)_socket;
891 
892 	MutexLocker _(socket->lock);
893 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
894 }
895 
896 
897 status_t
898 socket_notify(net_socket* _socket, uint8 event, int32 value)
899 {
900 	net_socket_private* socket = (net_socket_private*)_socket;
901 	bool notify = true;
902 
903 	switch (event) {
904 		case B_SELECT_READ:
905 			if ((ssize_t)socket->receive.low_water_mark > value && value >= B_OK)
906 				notify = false;
907 			break;
908 
909 		case B_SELECT_WRITE:
910 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
911 				notify = false;
912 			break;
913 
914 		case B_SELECT_ERROR:
915 			socket->error = value;
916 			break;
917 	}
918 
919 	MutexLocker _(socket->lock);
920 
921 	if (notify && socket->select_pool)
922 		notify_select_event_pool(socket->select_pool, event);
923 
924 	return B_OK;
925 }
926 
927 
928 //	#pragma mark - standard socket API
929 
930 
931 int
932 socket_accept(net_socket* socket, struct sockaddr* address,
933 	socklen_t* _addressLength, net_socket** _acceptedSocket)
934 {
935 	if ((socket->options & SO_ACCEPTCONN) == 0)
936 		return B_BAD_VALUE;
937 
938 	net_socket* accepted;
939 	status_t status = socket->first_info->accept(socket->first_protocol,
940 		&accepted);
941 	if (status < B_OK)
942 		return status;
943 
944 	if (address && *_addressLength > 0) {
945 		memcpy(address, &accepted->peer, min_c(*_addressLength,
946 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
947 		*_addressLength = accepted->peer.ss_len;
948 	}
949 
950 	*_acceptedSocket = accepted;
951 	return B_OK;
952 }
953 
954 
955 int
956 socket_bind(net_socket* socket, const struct sockaddr* address,
957 	socklen_t addressLength)
958 {
959 	sockaddr empty;
960 	if (address == NULL) {
961 		// special - try to bind to an empty address, like INADDR_ANY
962 		memset(&empty, 0, sizeof(sockaddr));
963 		empty.sa_len = sizeof(sockaddr);
964 		empty.sa_family = socket->family;
965 
966 		address = &empty;
967 		addressLength = sizeof(sockaddr);
968 	}
969 
970 	if (socket->address.ss_len != 0) {
971 		status_t status = socket->first_info->unbind(socket->first_protocol,
972 			(sockaddr*)&socket->address);
973 		if (status < B_OK)
974 			return status;
975 	}
976 
977 	memcpy(&socket->address, address, sizeof(sockaddr));
978 
979 	status_t status = socket->first_info->bind(socket->first_protocol,
980 		(sockaddr*)address);
981 	if (status < B_OK) {
982 		// clear address again, as binding failed
983 		socket->address.ss_len = 0;
984 	}
985 
986 	return status;
987 }
988 
989 
990 int
991 socket_connect(net_socket* socket, const struct sockaddr* address,
992 	socklen_t addressLength)
993 {
994 	if (address == NULL || addressLength == 0)
995 		return ENETUNREACH;
996 
997 	if (socket->address.ss_len == 0) {
998 		// try to bind first
999 		status_t status = socket_bind(socket, NULL, 0);
1000 		if (status < B_OK)
1001 			return status;
1002 	}
1003 
1004 	return socket->first_info->connect(socket->first_protocol, address);
1005 }
1006 
1007 
1008 int
1009 socket_getpeername(net_socket* socket, struct sockaddr* address,
1010 	socklen_t* _addressLength)
1011 {
1012 	if (socket->peer.ss_len == 0)
1013 		return ENOTCONN;
1014 
1015 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1016 	*_addressLength = socket->peer.ss_len;
1017 	return B_OK;
1018 }
1019 
1020 
1021 int
1022 socket_getsockname(net_socket* socket, struct sockaddr* address,
1023 	socklen_t* _addressLength)
1024 {
1025 	if (socket->address.ss_len == 0)
1026 		return ENOTCONN;
1027 
1028 	memcpy(address, &socket->address, min_c(*_addressLength,
1029 		socket->address.ss_len));
1030 	*_addressLength = socket->address.ss_len;
1031 	return B_OK;
1032 }
1033 
1034 
1035 status_t
1036 socket_get_option(net_socket* socket, int level, int option, void* value,
1037 	int* _length)
1038 {
1039 	if (level != SOL_SOCKET)
1040 		return ENOPROTOOPT;
1041 
1042 	switch (option) {
1043 		case SO_SNDBUF:
1044 		{
1045 			uint32* size = (uint32*)value;
1046 			*size = socket->send.buffer_size;
1047 			*_length = sizeof(uint32);
1048 			return B_OK;
1049 		}
1050 
1051 		case SO_RCVBUF:
1052 		{
1053 			uint32* size = (uint32*)value;
1054 			*size = socket->receive.buffer_size;
1055 			*_length = sizeof(uint32);
1056 			return B_OK;
1057 		}
1058 
1059 		case SO_SNDLOWAT:
1060 		{
1061 			uint32* size = (uint32*)value;
1062 			*size = socket->send.low_water_mark;
1063 			*_length = sizeof(uint32);
1064 			return B_OK;
1065 		}
1066 
1067 		case SO_RCVLOWAT:
1068 		{
1069 			uint32* size = (uint32*)value;
1070 			*size = socket->receive.low_water_mark;
1071 			*_length = sizeof(uint32);
1072 			return B_OK;
1073 		}
1074 
1075 		case SO_RCVTIMEO:
1076 		case SO_SNDTIMEO:
1077 		{
1078 			if (*_length < (int)sizeof(struct timeval))
1079 				return B_BAD_VALUE;
1080 
1081 			bigtime_t timeout;
1082 			if (option == SO_SNDTIMEO)
1083 				timeout = socket->send.timeout;
1084 			else
1085 				timeout = socket->receive.timeout;
1086 			if (timeout == B_INFINITE_TIMEOUT)
1087 				timeout = 0;
1088 
1089 			struct timeval* timeval = (struct timeval*)value;
1090 			timeval->tv_sec = timeout / 1000000LL;
1091 			timeval->tv_usec = timeout % 1000000LL;
1092 
1093 			*_length = sizeof(struct timeval);
1094 			return B_OK;
1095 		}
1096 
1097 		case SO_NONBLOCK:
1098 		{
1099 			int32* _set = (int32*)value;
1100 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1101 			*_length = sizeof(int32);
1102 			return B_OK;
1103 		}
1104 
1105 		case SO_ACCEPTCONN:
1106 		case SO_BROADCAST:
1107 		case SO_DEBUG:
1108 		case SO_DONTROUTE:
1109 		case SO_KEEPALIVE:
1110 		case SO_OOBINLINE:
1111 		case SO_REUSEADDR:
1112 		case SO_REUSEPORT:
1113 		case SO_USELOOPBACK:
1114 		{
1115 			int32* _set = (int32*)value;
1116 			*_set = (socket->options & option) != 0;
1117 			*_length = sizeof(int32);
1118 			return B_OK;
1119 		}
1120 
1121 		case SO_TYPE:
1122 		{
1123 			int32* _set = (int32*)value;
1124 			*_set = socket->type;
1125 			*_length = sizeof(int32);
1126 			return B_OK;
1127 		}
1128 
1129 		case SO_ERROR:
1130 		{
1131 			int32* _set = (int32*)value;
1132 			*_set = socket->error;
1133 			*_length = sizeof(int32);
1134 
1135 			socket->error = B_OK;
1136 				// clear error upon retrieval
1137 			return B_OK;
1138 		}
1139 
1140 		default:
1141 			break;
1142 	}
1143 
1144 	dprintf("socket_getsockopt: unknown option %d\n", option);
1145 	return ENOPROTOOPT;
1146 }
1147 
1148 
1149 int
1150 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1151 	int* _length)
1152 {
1153 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1154 		level, option, value, _length);
1155 }
1156 
1157 
1158 int
1159 socket_listen(net_socket* socket, int backlog)
1160 {
1161 	status_t status = socket->first_info->listen(socket->first_protocol,
1162 		backlog);
1163 	if (status == B_OK)
1164 		socket->options |= SO_ACCEPTCONN;
1165 
1166 	return status;
1167 }
1168 
1169 
1170 ssize_t
1171 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1172 	int flags)
1173 {
1174 	// If the protocol sports read_data_no_buffer() we use it.
1175 	if (socket->first_info->read_data_no_buffer != NULL)
1176 		return socket_receive_no_buffer(socket, header, data, length, flags);
1177 
1178 	size_t totalLength = length;
1179 	net_buffer* buffer;
1180 	int i;
1181 
1182 	// the convention to this function is that have header been
1183 	// present, { data, length } would have been iovec[0] and is
1184 	// always considered like that
1185 
1186 	if (header) {
1187 		// calculate the length considering all of the extra buffers
1188 		for (i = 1; i < header->msg_iovlen; i++)
1189 			totalLength += header->msg_iov[i].iov_len;
1190 	}
1191 
1192 	status_t status = socket->first_info->read_data(
1193 		socket->first_protocol, totalLength, flags, &buffer);
1194 	if (status < B_OK)
1195 		return status;
1196 
1197 	// process ancillary data
1198 	if (header != NULL) {
1199 		if (buffer != NULL && header->msg_control != NULL) {
1200 			ancillary_data_container* container
1201 				= gNetBufferModule.get_ancillary_data(buffer);
1202 			if (container != NULL)
1203 				status = process_ancillary_data(socket, container, header);
1204 			else
1205 				status = process_ancillary_data(socket, buffer, header);
1206 			if (status != B_OK) {
1207 				gNetBufferModule.free(buffer);
1208 				return status;
1209 			}
1210 		} else
1211 			header->msg_controllen = 0;
1212 	}
1213 
1214 	// TODO: - returning a NULL buffer when received 0 bytes
1215 	//         may not make much sense as we still need the address
1216 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1217 
1218 	size_t nameLen = 0;
1219 
1220 	if (header) {
1221 		// TODO: - consider the control buffer options
1222 		nameLen = header->msg_namelen;
1223 		header->msg_namelen = 0;
1224 		header->msg_flags = 0;
1225 	}
1226 
1227 	if (buffer == NULL)
1228 		return 0;
1229 
1230 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1231 
1232 	length = min_c(bytesReceived, length);
1233 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1234 		gNetBufferModule.free(buffer);
1235 		return ENOBUFS;
1236 	}
1237 
1238 	// if first copy was a success, proceed to following
1239 	// copies as required
1240 	bytesCopied += length;
1241 
1242 	if (header) {
1243 		// we only start considering at iovec[1]
1244 		// as { data, length } is iovec[0]
1245 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1246 			iovec& vec = header->msg_iov[i];
1247 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1248 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1249 					toRead) < B_OK) {
1250 				break;
1251 			}
1252 
1253 			bytesCopied += toRead;
1254 		}
1255 
1256 		if (header->msg_name != NULL) {
1257 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1258 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1259 		}
1260 	}
1261 
1262 	gNetBufferModule.free(buffer);
1263 
1264 	if (bytesCopied < bytesReceived) {
1265 		if (header)
1266 			header->msg_flags = MSG_TRUNC;
1267 
1268 		if (flags & MSG_TRUNC)
1269 			return bytesReceived;
1270 	}
1271 
1272 	return bytesCopied;
1273 }
1274 
1275 
1276 ssize_t
1277 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1278 	int flags)
1279 {
1280 	const sockaddr* address = NULL;
1281 	socklen_t addressLength = 0;
1282 	size_t bytesLeft = length;
1283 
1284 	if (length > SSIZE_MAX)
1285 		return B_BAD_VALUE;
1286 
1287 	ancillary_data_container* ancillaryData = NULL;
1288 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(NULL,
1289 		&delete_ancillary_data_container);
1290 
1291 	if (header != NULL) {
1292 		address = (const sockaddr*)header->msg_name;
1293 		addressLength = header->msg_namelen;
1294 
1295 		// get the ancillary data
1296 		if (header->msg_control != NULL) {
1297 			ancillaryData = create_ancillary_data_container();
1298 			if (ancillaryData == NULL)
1299 				return B_NO_MEMORY;
1300 			ancillaryDataDeleter.SetTo(ancillaryData);
1301 
1302 			status_t status = add_ancillary_data(socket, ancillaryData,
1303 				(cmsghdr*)header->msg_control, header->msg_controllen);
1304 			if (status != B_OK)
1305 				return status;
1306 		}
1307 	}
1308 
1309 	if (addressLength == 0)
1310 		address = NULL;
1311 	else if (addressLength != 0 && address == NULL)
1312 		return B_BAD_VALUE;
1313 
1314 	if (socket->peer.ss_len != 0) {
1315 		if (address != NULL)
1316 			return EISCONN;
1317 
1318 		// socket is connected, we use that address
1319 		address = (struct sockaddr*)&socket->peer;
1320 		addressLength = socket->peer.ss_len;
1321 	}
1322 
1323 	if (address == NULL || addressLength == 0) {
1324 		// don't know where to send to:
1325 		return EDESTADDRREQ;
1326 	}
1327 
1328 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1329 		&& bytesLeft > socket->send.buffer_size)
1330 		return EMSGSIZE;
1331 
1332 	if (socket->address.ss_len == 0) {
1333 		// try to bind first
1334 		status_t status = socket_bind(socket, NULL, 0);
1335 		if (status < B_OK)
1336 			return status;
1337 	}
1338 
1339 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1340 	if (socket->first_info->send_data_no_buffer != NULL) {
1341 		iovec stackVec = { (void*)data, length };
1342 		iovec* vecs = header ? header->msg_iov : &stackVec;
1343 		int vecCount = header ? header->msg_iovlen : 1;
1344 
1345 		ssize_t written = socket->first_info->send_data_no_buffer(
1346 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1347 			addressLength);
1348 		if (written > 0)
1349 			ancillaryDataDeleter.Detach();
1350 		return written;
1351 	}
1352 
1353 	// By convention, if a header is given, the (data, length) equals the first
1354 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1355 	// the size of the remaining ones.
1356 	if (header != NULL) {
1357 		if (header->msg_iovlen <= 1)
1358 			header = NULL;
1359 		else {
1360 // TODO: The iovecs have already been copied to kernel space. Simplify!
1361 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1362 				header->msg_iovlen - 1);
1363 		}
1364 	}
1365 
1366 	ssize_t bytesSent = 0;
1367 	size_t vecOffset = 0;
1368 	uint32 vecIndex = 0;
1369 
1370 	while (bytesLeft > 0) {
1371 		// TODO: useful, maybe even computed header space!
1372 		net_buffer* buffer = gNetBufferModule.create(256);
1373 		if (buffer == NULL)
1374 			return ENOBUFS;
1375 
1376 		while (buffer->size < socket->send.buffer_size
1377 			&& buffer->size < bytesLeft) {
1378 			if (vecIndex > 0 && vecOffset == 0) {
1379 				// retrieve next iovec buffer from header
1380 				iovec vec;
1381 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1382 						< B_OK) {
1383 					gNetBufferModule.free(buffer);
1384 					return B_BAD_ADDRESS;
1385 				}
1386 
1387 				data = vec.iov_base;
1388 				length = vec.iov_len;
1389 			}
1390 
1391 			size_t bytes = length;
1392 			if (buffer->size + bytes > socket->send.buffer_size)
1393 				bytes = socket->send.buffer_size - buffer->size;
1394 
1395 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1396 				gNetBufferModule.free(buffer);
1397 				return ENOBUFS;
1398 			}
1399 
1400 			if (bytes != length) {
1401 				// partial send
1402 				vecOffset = bytes;
1403 				length -= vecOffset;
1404 				data = (uint8*)data + vecOffset;
1405 			} else if (header != NULL) {
1406 				// proceed with next buffer, if any
1407 				vecOffset = 0;
1408 				vecIndex++;
1409 
1410 				if (vecIndex >= (uint32)header->msg_iovlen)
1411 					break;
1412 			}
1413 		}
1414 
1415 		// attach ancillary data to the first buffer
1416 		status_t status = B_OK;
1417 		if (ancillaryData != NULL) {
1418 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1419 			ancillaryDataDeleter.Detach();
1420 			ancillaryData = NULL;
1421 		}
1422 
1423 		size_t bufferSize = buffer->size;
1424 		buffer->flags = flags;
1425 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1426 		memcpy(buffer->destination, address, addressLength);
1427 
1428 		if (status == B_OK) {
1429 			status = socket->first_info->send_data(socket->first_protocol,
1430 				buffer);
1431 		}
1432 		if (status < B_OK) {
1433 			size_t sizeAfterSend = buffer->size;
1434 			gNetBufferModule.free(buffer);
1435 
1436 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1437 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1438 				// this appears to be a partial write
1439 				return bytesSent + (bufferSize - sizeAfterSend);
1440 			}
1441 			return status;
1442 		}
1443 
1444 		bytesLeft -= bufferSize;
1445 		bytesSent += bufferSize;
1446 	}
1447 
1448 	return bytesSent;
1449 }
1450 
1451 
1452 status_t
1453 socket_set_option(net_socket* socket, int level, int option, const void* value,
1454 	int length)
1455 {
1456 	if (level != SOL_SOCKET)
1457 		return ENOPROTOOPT;
1458 
1459 	switch (option) {
1460 		// TODO: implement other options!
1461 		case SO_LINGER:
1462 		{
1463 			if (length < (int)sizeof(struct linger))
1464 				return B_BAD_VALUE;
1465 
1466 			struct linger* linger = (struct linger*)value;
1467 			if (linger->l_onoff) {
1468 				socket->options |= SO_LINGER;
1469 				socket->linger = linger->l_linger;
1470 			} else {
1471 				socket->options &= ~SO_LINGER;
1472 				socket->linger = 0;
1473 			}
1474 			return B_OK;
1475 		}
1476 
1477 		case SO_SNDBUF:
1478 			if (length != sizeof(uint32))
1479 				return B_BAD_VALUE;
1480 
1481 			socket->send.buffer_size = *(const uint32*)value;
1482 			return B_OK;
1483 
1484 		case SO_RCVBUF:
1485 			if (length != sizeof(uint32))
1486 				return B_BAD_VALUE;
1487 
1488 			socket->receive.buffer_size = *(const uint32*)value;
1489 			return B_OK;
1490 
1491 		case SO_SNDLOWAT:
1492 			if (length != sizeof(uint32))
1493 				return B_BAD_VALUE;
1494 
1495 			socket->send.low_water_mark = *(const uint32*)value;
1496 			return B_OK;
1497 
1498 		case SO_RCVLOWAT:
1499 			if (length != sizeof(uint32))
1500 				return B_BAD_VALUE;
1501 
1502 			socket->receive.low_water_mark = *(const uint32*)value;
1503 			return B_OK;
1504 
1505 		case SO_RCVTIMEO:
1506 		case SO_SNDTIMEO:
1507 		{
1508 			if (length != sizeof(struct timeval))
1509 				return B_BAD_VALUE;
1510 
1511 			const struct timeval* timeval = (const struct timeval*)value;
1512 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1513 			if (timeout == 0)
1514 				timeout = B_INFINITE_TIMEOUT;
1515 
1516 			if (option == SO_SNDTIMEO)
1517 				socket->send.timeout = timeout;
1518 			else
1519 				socket->receive.timeout = timeout;
1520 			return B_OK;
1521 		}
1522 
1523 		case SO_NONBLOCK:
1524 			if (length != sizeof(int32))
1525 				return B_BAD_VALUE;
1526 
1527 			if (*(const int32*)value) {
1528 				socket->send.timeout = 0;
1529 				socket->receive.timeout = 0;
1530 			} else {
1531 				socket->send.timeout = B_INFINITE_TIMEOUT;
1532 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1533 			}
1534 			return B_OK;
1535 
1536 		case SO_BROADCAST:
1537 		case SO_DEBUG:
1538 		case SO_DONTROUTE:
1539 		case SO_KEEPALIVE:
1540 		case SO_OOBINLINE:
1541 		case SO_REUSEADDR:
1542 		case SO_REUSEPORT:
1543 		case SO_USELOOPBACK:
1544 			if (length != sizeof(int32))
1545 				return B_BAD_VALUE;
1546 
1547 			if (*(const int32*)value)
1548 				socket->options |= option;
1549 			else
1550 				socket->options &= ~option;
1551 			return B_OK;
1552 
1553 		case SO_BINDTODEVICE:
1554 		{
1555 			if (length != sizeof(int32))
1556 				return B_BAD_VALUE;
1557 
1558 			int index = *(const int32*)value;
1559 			if (index < 0)
1560 				return B_BAD_VALUE;
1561 
1562 			// TODO: we might want to check if the device exists at all
1563 			// (although it doesn't really harm when we don't)
1564 			socket->bound_to_device = index;
1565 			return B_OK;
1566 		}
1567 
1568 		default:
1569 			break;
1570 	}
1571 
1572 	dprintf("socket_setsockopt: unknown option %d\n", option);
1573 	return ENOPROTOOPT;
1574 }
1575 
1576 
1577 int
1578 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1579 	int length)
1580 {
1581 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1582 		level, option, value, length);
1583 }
1584 
1585 
1586 int
1587 socket_shutdown(net_socket* socket, int direction)
1588 {
1589 	return socket->first_info->shutdown(socket->first_protocol, direction);
1590 }
1591 
1592 
1593 status_t
1594 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1595 {
1596 	sockets[0] = NULL;
1597 	sockets[1] = NULL;
1598 
1599 	// create sockets
1600 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1601 	if (error != B_OK)
1602 		return error;
1603 
1604 	if (error == B_OK)
1605 		error = socket_open(family, type, protocol, &sockets[1]);
1606 
1607 	// bind one
1608 	if (error == B_OK)
1609 		error = socket_bind(sockets[0], NULL, 0);
1610 
1611 	// start listening
1612 	if (error == B_OK)
1613 		error = socket_listen(sockets[0], 1);
1614 
1615 	// connect them
1616 	if (error == B_OK) {
1617 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1618 			sockets[0]->address.ss_len);
1619 	}
1620 
1621 	// accept a socket
1622 	net_socket* acceptedSocket = NULL;
1623 	if (error == B_OK)
1624 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1625 
1626 	if (error == B_OK) {
1627 		// everything worked: close the listener socket
1628 		socket_close(sockets[0]);
1629 		socket_free(sockets[0]);
1630 		sockets[0] = acceptedSocket;
1631 	} else {
1632 		// close sockets on error
1633 		for (int i = 0; i < 2; i++) {
1634 			if (sockets[i] != NULL) {
1635 				socket_close(sockets[i]);
1636 				socket_free(sockets[i]);
1637 				sockets[i] = NULL;
1638 			}
1639 		}
1640 	}
1641 
1642 	return error;
1643 }
1644 
1645 
1646 //	#pragma mark -
1647 
1648 
1649 static status_t
1650 socket_std_ops(int32 op, ...)
1651 {
1652 	switch (op) {
1653 		case B_MODULE_INIT:
1654 		{
1655 			new (&sSocketList) SocketList;
1656 			mutex_init(&sSocketLock, "socket list");
1657 
1658 #ifdef ADD_DEBUGGER_COMMANDS
1659 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1660 			add_debugger_command("socket", dump_socket, "dumps a socket");
1661 #endif
1662 			return B_OK;
1663 		}
1664 		case B_MODULE_UNINIT:
1665 			ASSERT(sSocketList.IsEmpty());
1666 			mutex_destroy(&sSocketLock);
1667 
1668 #ifdef ADD_DEBUGGER_COMMANDS
1669 			remove_debugger_command("socket", dump_socket);
1670 			remove_debugger_command("sockets", dump_sockets);
1671 #endif
1672 			return B_OK;
1673 
1674 		default:
1675 			return B_ERROR;
1676 	}
1677 }
1678 
1679 
1680 net_socket_module_info gNetSocketModule = {
1681 	{
1682 		NET_SOCKET_MODULE_NAME,
1683 		0,
1684 		socket_std_ops
1685 	},
1686 	socket_open,
1687 	socket_close,
1688 	socket_free,
1689 
1690 	socket_readv,
1691 	socket_writev,
1692 	socket_control,
1693 
1694 	socket_read_avail,
1695 	socket_send_avail,
1696 
1697 	socket_send_data,
1698 	socket_receive_data,
1699 
1700 	socket_get_option,
1701 	socket_set_option,
1702 
1703 	socket_get_next_stat,
1704 
1705 	// connections
1706 	socket_acquire,
1707 	socket_release,
1708 	socket_spawn_pending,
1709 	socket_dequeue_connected,
1710 	socket_count_connected,
1711 	socket_set_max_backlog,
1712 	socket_has_parent,
1713 	socket_connected,
1714 	socket_aborted,
1715 
1716 	// notifications
1717 	socket_request_notification,
1718 	socket_cancel_notification,
1719 	socket_notify,
1720 
1721 	// standard socket API
1722 	socket_accept,
1723 	socket_bind,
1724 	socket_connect,
1725 	socket_getpeername,
1726 	socket_getsockname,
1727 	socket_getsockopt,
1728 	socket_listen,
1729 	socket_receive,
1730 	socket_send,
1731 	socket_setsockopt,
1732 	socket_shutdown,
1733 	socket_socketpair
1734 };
1735 
1736