xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision e0ef64750f3169cd634bb2f7a001e22488b05231)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/time.h>
16 
17 #include <new>
18 
19 #include <Drivers.h>
20 #include <KernelExport.h>
21 #include <Select.h>
22 
23 #include <AutoDeleter.h>
24 #include <team.h>
25 #include <util/AutoLock.h>
26 #include <util/list.h>
27 #include <WeakReferenceable.h>
28 
29 #include <fs/select_sync_pool.h>
30 #include <kernel.h>
31 
32 #include <net_protocol.h>
33 #include <net_stack.h>
34 #include <net_stat.h>
35 
36 #include "ancillary_data.h"
37 #include "utility.h"
38 
39 
40 #define ADD_DEBUGGER_COMMANDS
41 
42 
43 struct net_socket_private;
44 typedef DoublyLinkedList<net_socket_private> SocketList;
45 
46 struct net_socket_private : net_socket,
47 		DoublyLinkedListLinkImpl<net_socket_private>,
48 		WeakReferenceable<net_socket_private> {
49 	net_socket_private();
50 	~net_socket_private();
51 
52 	void RemoveFromParent();
53 
54 	WeakPointer<net_socket_private>* parent;
55 	team_id						owner;
56 	uint32						max_backlog;
57 	uint32						child_count;
58 	SocketList					pending_children;
59 	SocketList					connected_children;
60 
61 	struct select_sync_pool*	select_pool;
62 	mutex						lock;
63 
64 	bool						is_connected;
65 	bool						is_in_socket_list;
66 };
67 
68 
69 int socket_bind(net_socket* socket, const struct sockaddr* address,
70 	socklen_t addressLength);
71 int socket_setsockopt(net_socket* socket, int level, int option,
72 	const void* value, int length);
73 ssize_t socket_read_avail(net_socket* socket);
74 
75 static SocketList sSocketList;
76 static mutex sSocketLock;
77 
78 
79 net_socket_private::net_socket_private()
80 	: WeakReferenceable<net_socket_private>(this),
81 	parent(NULL),
82 	owner(-1),
83 	max_backlog(0),
84 	child_count(0),
85 	select_pool(NULL),
86 	is_connected(false),
87 	is_in_socket_list(false)
88 {
89 	first_protocol = NULL;
90 	first_info = NULL;
91 	options = 0;
92 	linger = 0;
93 	bound_to_device = 0;
94 	error = 0;
95 
96 	address.ss_len = 0;
97 	peer.ss_len = 0;
98 
99 	mutex_init(&lock, "socket");
100 
101 	// set defaults (may be overridden by the protocols)
102 	send.buffer_size = 65535;
103 	send.low_water_mark = 1;
104 	send.timeout = B_INFINITE_TIMEOUT;
105 	receive.buffer_size = 65535;
106 	receive.low_water_mark = 1;
107 	receive.timeout = B_INFINITE_TIMEOUT;
108 }
109 
110 
111 net_socket_private::~net_socket_private()
112 {
113 	if (parent != NULL)
114 		panic("socket still has a parent!");
115 
116 	if (is_in_socket_list) {
117 		MutexLocker _(sSocketLock);
118 		sSocketList.Remove(this);
119 	}
120 
121 	mutex_lock(&lock);
122 
123 	// also delete all children of this socket
124 	while (net_socket_private* child = pending_children.RemoveHead()) {
125 		child->RemoveFromParent();
126 	}
127 	while (net_socket_private* child = connected_children.RemoveHead()) {
128 		child->RemoveFromParent();
129 	}
130 
131 	mutex_unlock(&lock);
132 
133 	put_domain_protocols(this);
134 
135 	mutex_destroy(&lock);
136 }
137 
138 
139 void
140 net_socket_private::RemoveFromParent()
141 {
142 	ASSERT(!is_in_socket_list && parent != NULL);
143 
144 	parent->RemoveReference();
145 	parent = NULL;
146 
147 	mutex_lock(&sSocketLock);
148 	sSocketList.Add(this);
149 	mutex_unlock(&sSocketLock);
150 
151 	is_in_socket_list = true;
152 
153 	RemoveReference();
154 }
155 
156 
157 //	#pragma mark -
158 
159 
160 static size_t
161 compute_user_iovec_length(iovec* userVec, uint32 count)
162 {
163 	size_t length = 0;
164 
165 	for (uint32 i = 0; i < count; i++) {
166 		iovec vec;
167 		if (user_memcpy(&vec, userVec + i, sizeof(iovec)) < B_OK)
168 			return 0;
169 
170 		length += vec.iov_len;
171 	}
172 
173 	return length;
174 }
175 
176 
177 static status_t
178 create_socket(int family, int type, int protocol, net_socket_private** _socket)
179 {
180 	struct net_socket_private* socket = new(std::nothrow) net_socket_private;
181 	if (socket == NULL)
182 		return B_NO_MEMORY;
183 
184 	socket->family = family;
185 	socket->type = type;
186 	socket->protocol = protocol;
187 
188 	status_t status = get_domain_protocols(socket);
189 	if (status < B_OK) {
190 		delete socket;
191 		return status;
192 	}
193 
194 	*_socket = socket;
195 	return B_OK;
196 }
197 
198 
199 static status_t
200 add_ancillary_data(net_socket* socket, ancillary_data_container* container,
201 	void* data, size_t dataLen)
202 {
203 	cmsghdr* header = (cmsghdr*)data;
204 
205 	while (dataLen > 0) {
206 		if (header->cmsg_len < sizeof(cmsghdr) || header->cmsg_len > dataLen)
207 			return B_BAD_VALUE;
208 
209 		if (socket->first_info->add_ancillary_data == NULL)
210 			return B_NOT_SUPPORTED;
211 
212 		status_t status = socket->first_info->add_ancillary_data(
213 			socket->first_protocol, container, header);
214 		if (status != B_OK)
215 			return status;
216 
217 		dataLen -= _ALIGN(header->cmsg_len);
218 		header = (cmsghdr*)((uint8*)header + _ALIGN(header->cmsg_len));
219 	}
220 
221 	return B_OK;
222 }
223 
224 
225 static status_t
226 process_ancillary_data(net_socket* socket, ancillary_data_container* container,
227 	msghdr* messageHeader)
228 {
229 	uint8* dataBuffer = (uint8*)messageHeader->msg_control;
230 	int dataBufferLen = messageHeader->msg_controllen;
231 
232 	if (container == NULL || dataBuffer == NULL) {
233 		messageHeader->msg_controllen = 0;
234 		return B_OK;
235 	}
236 
237 	ancillary_data_header header;
238 	void* data = NULL;
239 
240 	while ((data = next_ancillary_data(container, data, &header)) != NULL) {
241 		if (socket->first_info->process_ancillary_data == NULL)
242 			return B_NOT_SUPPORTED;
243 
244 		ssize_t bytesWritten = socket->first_info->process_ancillary_data(
245 			socket->first_protocol, &header, data, dataBuffer, dataBufferLen);
246 		if (bytesWritten < 0)
247 			return bytesWritten;
248 
249 		dataBuffer += bytesWritten;
250 		dataBufferLen -= bytesWritten;
251 	}
252 
253 	messageHeader->msg_controllen -= dataBufferLen;
254 
255 	return B_OK;
256 }
257 
258 
259 static status_t
260 process_ancillary_data(net_socket* socket,
261 	net_buffer* buffer, msghdr* messageHeader)
262 {
263 	void *dataBuffer = messageHeader->msg_control;
264 	ssize_t bytesWritten;
265 
266 	if (dataBuffer == NULL) {
267 		messageHeader->msg_controllen = 0;
268 		return B_OK;
269 	}
270 
271 	if (socket->first_info->process_ancillary_data_no_container == NULL)
272 		return B_NOT_SUPPORTED;
273 
274 	bytesWritten = socket->first_info->process_ancillary_data_no_container(
275 		socket->first_protocol, buffer, dataBuffer,
276 		messageHeader->msg_controllen);
277 	if (bytesWritten < 0)
278 		return bytesWritten;
279 	messageHeader->msg_controllen = bytesWritten;
280 
281 	return B_OK;
282 }
283 
284 
285 static ssize_t
286 socket_receive_no_buffer(net_socket* socket, msghdr* header, void* data,
287 	size_t length, int flags)
288 {
289 	iovec stackVec = { data, length };
290 	iovec* vecs = header ? header->msg_iov : &stackVec;
291 	int vecCount = header ? header->msg_iovlen : 1;
292 	sockaddr* address = header ? (sockaddr*)header->msg_name : NULL;
293 	socklen_t* addressLen = header ? &header->msg_namelen : NULL;
294 
295 	ancillary_data_container* ancillaryData = NULL;
296 	ssize_t bytesRead = socket->first_info->read_data_no_buffer(
297 		socket->first_protocol, vecs, vecCount, &ancillaryData, address,
298 		addressLen);
299 	if (bytesRead < 0)
300 		return bytesRead;
301 
302 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(ancillaryData,
303 		&delete_ancillary_data_container);
304 
305 	// process ancillary data
306 	if (header != NULL) {
307 		status_t status = process_ancillary_data(socket, ancillaryData, header);
308 		if (status != B_OK)
309 			return status;
310 
311 		header->msg_flags = 0;
312 	}
313 
314 	return bytesRead;
315 }
316 
317 
318 #ifdef ADD_DEBUGGER_COMMANDS
319 
320 static void
321 print_socket_line(net_socket_private* socket, const char* prefix)
322 {
323 	kprintf("%s%p %2d.%2d.%2d %6ld %p %p  %p%s\n", prefix, socket,
324 		socket->family, socket->type, socket->protocol, socket->owner,
325 		socket->first_protocol, socket->first_info, socket->parent,
326 		socket->parent != NULL ? socket->is_connected ? " (c)" : " (p)" : "");
327 }
328 
329 
330 static int
331 dump_socket(int argc, char** argv)
332 {
333 	if (argc < 2) {
334 		kprintf("usage: %s [address]\n", argv[0]);
335 		return 0;
336 	}
337 
338 	net_socket_private* socket = (net_socket_private*)parse_expression(argv[1]);
339 
340 	kprintf("SOCKET %p\n", socket);
341 	kprintf("  family.type.protocol: %d.%d.%d\n",
342 		socket->family, socket->type, socket->protocol);
343 	WeakReference<net_socket_private> parent = socket->parent;
344 	kprintf("  parent:               %p (%p)\n", parent.Get(), socket->parent);
345 	kprintf("  first protocol:       %p\n", socket->first_protocol);
346 	kprintf("  first module_info:    %p\n", socket->first_info);
347 	kprintf("  options:              %x\n", socket->options);
348 	kprintf("  linger:               %d\n", socket->linger);
349 	kprintf("  bound to device:      %d\n", socket->bound_to_device);
350 	kprintf("  owner:                %ld\n", socket->owner);
351 	kprintf("  max backlog:          %ld\n", socket->max_backlog);
352 	kprintf("  is connected:         %d\n", socket->is_connected);
353 	kprintf("  child_count:          %lu\n", socket->child_count);
354 
355 	if (socket->child_count == 0)
356 		return 0;
357 
358 	kprintf("    pending children:\n");
359 	SocketList::Iterator iterator = socket->pending_children.GetIterator();
360 	while (net_socket_private* child = iterator.Next()) {
361 		print_socket_line(child, "      ");
362 	}
363 
364 	kprintf("    connected children:\n");
365 	iterator = socket->connected_children.GetIterator();
366 	while (net_socket_private* child = iterator.Next()) {
367 		print_socket_line(child, "      ");
368 	}
369 
370 	return 0;
371 }
372 
373 
374 static int
375 dump_sockets(int argc, char** argv)
376 {
377 	kprintf("address        kind  owner protocol   module_info parent\n");
378 
379 	SocketList::Iterator iterator = sSocketList.GetIterator();
380 	while (net_socket_private* socket = iterator.Next()) {
381 		print_socket_line(socket, "");
382 
383 		SocketList::Iterator childIterator
384 			= socket->pending_children.GetIterator();
385 		while (net_socket_private* child = childIterator.Next()) {
386 			print_socket_line(child, " ");
387 		}
388 
389 		childIterator = socket->connected_children.GetIterator();
390 		while (net_socket_private* child = childIterator.Next()) {
391 			print_socket_line(child, " ");
392 		}
393 	}
394 
395 	return 0;
396 }
397 
398 #endif	// ADD_DEBUGGER_COMMANDS
399 
400 
401 //	#pragma mark -
402 
403 
404 status_t
405 socket_open(int family, int type, int protocol, net_socket** _socket)
406 {
407 	net_socket_private* socket;
408 	status_t status = create_socket(family, type, protocol, &socket);
409 	if (status < B_OK)
410 		return status;
411 
412 	status = socket->first_info->open(socket->first_protocol);
413 	if (status < B_OK) {
414 		delete socket;
415 		return status;
416 	}
417 
418 	socket->owner = team_get_current_team_id();
419 	socket->is_in_socket_list = true;
420 
421 	mutex_lock(&sSocketLock);
422 	sSocketList.Add(socket);
423 	mutex_unlock(&sSocketLock);
424 
425 	*_socket = socket;
426 	return B_OK;
427 }
428 
429 
430 status_t
431 socket_close(net_socket* _socket)
432 {
433 	net_socket_private* socket = (net_socket_private*)_socket;
434 	return socket->first_info->close(socket->first_protocol);
435 }
436 
437 
438 void
439 socket_free(net_socket* _socket)
440 {
441 	net_socket_private* socket = (net_socket_private*)_socket;
442 	socket->first_info->free(socket->first_protocol);
443 	socket->RemoveReference();
444 }
445 
446 
447 status_t
448 socket_readv(net_socket* socket, const iovec* vecs, size_t vecCount,
449 	size_t* _length)
450 {
451 	return -1;
452 }
453 
454 
455 status_t
456 socket_writev(net_socket* socket, const iovec* vecs, size_t vecCount,
457 	size_t* _length)
458 {
459 	if (socket->peer.ss_len == 0)
460 		return ECONNRESET;
461 
462 	if (socket->address.ss_len == 0) {
463 		// try to bind first
464 		status_t status = socket_bind(socket, NULL, 0);
465 		if (status < B_OK)
466 			return status;
467 	}
468 
469 	// TODO: useful, maybe even computed header space!
470 	net_buffer* buffer = gNetBufferModule.create(256);
471 	if (buffer == NULL)
472 		return ENOBUFS;
473 
474 	// copy data into buffer
475 
476 	for (uint32 i = 0; i < vecCount; i++) {
477 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
478 				vecs[i].iov_len) < B_OK) {
479 			gNetBufferModule.free(buffer);
480 			return ENOBUFS;
481 		}
482 	}
483 
484 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
485 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
486 	size_t size = buffer->size;
487 
488 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
489 		buffer);
490 	if (bytesWritten < B_OK) {
491 		if (buffer->size != size) {
492 			// this appears to be a partial write
493 			*_length = size - buffer->size;
494 		}
495 		gNetBufferModule.free(buffer);
496 		return bytesWritten;
497 	}
498 
499 	*_length = bytesWritten;
500 	return B_OK;
501 }
502 
503 
504 status_t
505 socket_control(net_socket* socket, int32 op, void* data, size_t length)
506 {
507 	switch (op) {
508 		case FIONBIO:
509 		{
510 			if (data == NULL)
511 				return B_BAD_VALUE;
512 
513 			int value;
514 			if (is_syscall()) {
515 				if (!IS_USER_ADDRESS(data)
516 					|| user_memcpy(&value, data, sizeof(int)) != B_OK) {
517 					return B_BAD_ADDRESS;
518 				}
519 			} else
520 				value = *(int*)data;
521 
522 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
523 				sizeof(int));
524 		}
525 
526 		case FIONREAD:
527 		{
528 			if (data == NULL)
529 				return B_BAD_VALUE;
530 
531 			ssize_t available = socket_read_avail(socket);
532 			if (available < B_OK)
533 				return available;
534 
535 			if (is_syscall()) {
536 				if (!IS_USER_ADDRESS(data)
537 					|| user_memcpy(data, &available, sizeof(ssize_t)) != B_OK) {
538 					return B_BAD_ADDRESS;
539 				}
540 			} else
541 				*(ssize_t *)data = available;
542 
543 			return B_OK;
544 		}
545 
546 		case B_SET_BLOCKING_IO:
547 		case B_SET_NONBLOCKING_IO:
548 		{
549 			int value = op == B_SET_NONBLOCKING_IO;
550 			return socket_setsockopt(socket, SOL_SOCKET, SO_NONBLOCK, &value,
551 				sizeof(int));
552 		}
553 	}
554 
555 	return socket->first_info->control(socket->first_protocol,
556 		LEVEL_DRIVER_IOCTL, op, data, &length);
557 }
558 
559 
560 ssize_t
561 socket_read_avail(net_socket* socket)
562 {
563 	return socket->first_info->read_avail(socket->first_protocol);
564 }
565 
566 
567 ssize_t
568 socket_send_avail(net_socket* socket)
569 {
570 	return socket->first_info->send_avail(socket->first_protocol);
571 }
572 
573 
574 status_t
575 socket_send_data(net_socket* socket, net_buffer* buffer)
576 {
577 	return socket->first_info->send_data(socket->first_protocol,
578 		buffer);
579 }
580 
581 
582 status_t
583 socket_receive_data(net_socket* socket, size_t length, uint32 flags,
584 	net_buffer** _buffer)
585 {
586 	status_t status = socket->first_info->read_data(socket->first_protocol,
587 		length, flags, _buffer);
588 
589 	if (status < B_OK)
590 		return status;
591 
592 	if (*_buffer && length < (*_buffer)->size) {
593 		// discard any data behind the amount requested
594 		gNetBufferModule.trim(*_buffer, length);
595 	}
596 
597 	return status;
598 }
599 
600 
601 status_t
602 socket_get_next_stat(uint32* _cookie, int family, struct net_stat* stat)
603 {
604 	MutexLocker locker(sSocketLock);
605 
606 	net_socket_private* socket = NULL;
607 	SocketList::Iterator iterator = sSocketList.GetIterator();
608 	uint32 cookie = *_cookie;
609 	uint32 count = 0;
610 
611 	while (true) {
612 		socket = iterator.Next();
613 		if (socket == NULL)
614 			return B_ENTRY_NOT_FOUND;
615 
616 		// TODO: also traverse the pending connections
617 		if (count == cookie)
618 			break;
619 
620 		if (family == -1 || family == socket->family)
621 			count++;
622 	}
623 
624 	*_cookie = count + 1;
625 
626 	stat->family = socket->family;
627 	stat->type = socket->type;
628 	stat->protocol = socket->protocol;
629 	stat->owner = socket->owner;
630 	stat->state[0] = '\0';
631 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
632 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
633 	stat->receive_queue_size = 0;
634 	stat->send_queue_size = 0;
635 
636 	// fill in protocol specific data (if supported by the protocol)
637 	size_t length = sizeof(net_stat);
638 	socket->first_info->control(socket->first_protocol, socket->protocol,
639 		NET_STAT_SOCKET, stat, &length);
640 
641 	return B_OK;
642 }
643 
644 
645 //	#pragma mark - connections
646 
647 
648 bool
649 socket_acquire(net_socket* _socket)
650 {
651 	net_socket_private* socket = (net_socket_private*)_socket;
652 
653 	// During destruction, the socket might still be accessible over its endpoint
654 	// protocol. We need to make sure the endpoint cannot acquire the socket
655 	// anymore -- while not obvious, the endpoint protocol is responsible for the
656 	// proper locking here.
657 	if (socket->CountReferences() == 0)
658 		return false;
659 
660 	socket->AddReference();
661 	return true;
662 }
663 
664 
665 bool
666 socket_release(net_socket* _socket)
667 {
668 	net_socket_private* socket = (net_socket_private*)_socket;
669 	return socket->RemoveReference();
670 }
671 
672 
673 status_t
674 socket_spawn_pending(net_socket* _parent, net_socket** _socket)
675 {
676 	net_socket_private* parent = (net_socket_private*)_parent;
677 
678 	MutexLocker locker(parent->lock);
679 
680 	// We actually accept more pending connections to compensate for those
681 	// that never complete, and also make sure at least a single connection
682 	// can always be accepted
683 	if (parent->child_count > 3 * parent->max_backlog / 2)
684 		return ENOBUFS;
685 
686 	net_socket_private* socket;
687 	status_t status = create_socket(parent->family, parent->type,
688 		parent->protocol, &socket);
689 	if (status < B_OK)
690 		return status;
691 
692 	// inherit parent's properties
693 	socket->send = parent->send;
694 	socket->receive = parent->receive;
695 	socket->options = parent->options & ~SO_ACCEPTCONN;
696 	socket->linger = parent->linger;
697 	socket->owner = parent->owner;
698 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
699 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
700 
701 	// add to the parent's list of pending connections
702 	parent->pending_children.Add(socket);
703 	socket->parent = parent->GetWeakPointer();
704 	parent->child_count++;
705 
706 	*_socket = socket;
707 	return B_OK;
708 }
709 
710 
711 /*!	Dequeues a connected child from a parent socket.
712 	It also returns a reference with the child socket.
713 */
714 status_t
715 socket_dequeue_connected(net_socket* _parent, net_socket** _socket)
716 {
717 	net_socket_private* parent = (net_socket_private*)_parent;
718 
719 	mutex_lock(&parent->lock);
720 
721 	net_socket_private* socket = parent->connected_children.RemoveHead();
722 	if (socket != NULL) {
723 		socket->AddReference();
724 		socket->RemoveFromParent();
725 		parent->child_count--;
726 		*_socket = socket;
727 	}
728 
729 	mutex_unlock(&parent->lock);
730 
731 	if (socket == NULL)
732 		return B_ENTRY_NOT_FOUND;
733 
734 	return B_OK;
735 }
736 
737 
738 ssize_t
739 socket_count_connected(net_socket* _parent)
740 {
741 	net_socket_private* parent = (net_socket_private*)_parent;
742 
743 	MutexLocker _(parent->lock);
744 	return parent->connected_children.Count();
745 }
746 
747 
748 status_t
749 socket_set_max_backlog(net_socket* _socket, uint32 backlog)
750 {
751 	net_socket_private* socket = (net_socket_private*)_socket;
752 
753 	// we enforce an upper limit of connections waiting to be accepted
754 	if (backlog > 256)
755 		backlog = 256;
756 
757 	MutexLocker _(socket->lock);
758 
759 	// first remove the pending connections, then the already connected
760 	// ones as needed
761 	net_socket_private* child;
762 	while (socket->child_count > backlog
763 		&& (child = socket->pending_children.RemoveTail()) != NULL) {
764 		child->RemoveFromParent();
765 		socket->child_count--;
766 	}
767 	while (socket->child_count > backlog
768 		&& (child = socket->connected_children.RemoveTail()) != NULL) {
769 		child->RemoveFromParent();
770 		socket->child_count--;
771 	}
772 
773 	socket->max_backlog = backlog;
774 	return B_OK;
775 }
776 
777 
778 /*!	Returns whether or not this socket has a parent. The parent might not be
779 	valid anymore, though.
780 */
781 bool
782 socket_has_parent(net_socket* _socket)
783 {
784 	net_socket_private* socket = (net_socket_private*)_socket;
785 	return socket->parent != NULL;
786 }
787 
788 
789 /*!	The socket has been connected. It will be moved to the connected queue
790 	of its parent socket.
791 */
792 status_t
793 socket_connected(net_socket* _socket)
794 {
795 	net_socket_private* socket = (net_socket_private*)_socket;
796 
797 	WeakReference<net_socket_private> parent = socket->parent;
798 	if (parent.Get() == NULL)
799 		return B_BAD_VALUE;
800 
801 	MutexLocker _(parent->lock);
802 
803 	parent->pending_children.Remove(socket);
804 	parent->connected_children.Add(socket);
805 	socket->is_connected = true;
806 
807 	// notify parent
808 	if (parent->select_pool)
809 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
810 
811 	return B_OK;
812 }
813 
814 
815 /*!	The socket has been aborted. Steals the parent's reference, and releases
816 	it.
817 */
818 status_t
819 socket_aborted(net_socket* _socket)
820 {
821 	net_socket_private* socket = (net_socket_private*)_socket;
822 
823 	WeakReference<net_socket_private> parent = socket->parent;
824 	if (parent.Get() == NULL)
825 		return B_BAD_VALUE;
826 
827 	MutexLocker _(parent->lock);
828 
829 	if (socket->is_connected)
830 		parent->connected_children.Remove(socket);
831 	else
832 		parent->pending_children.Remove(socket);
833 
834 	parent->child_count--;
835 	socket->RemoveFromParent();
836 
837 	return B_OK;
838 }
839 
840 
841 //	#pragma mark - notifications
842 
843 
844 status_t
845 socket_request_notification(net_socket* _socket, uint8 event, selectsync* sync)
846 {
847 	net_socket_private* socket = (net_socket_private*)_socket;
848 
849 	mutex_lock(&socket->lock);
850 
851 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
852 		event);
853 
854 	mutex_unlock(&socket->lock);
855 
856 	if (status < B_OK)
857 		return status;
858 
859 	// check if the event is already present
860 	// TODO: add support for poll() types
861 
862 	switch (event) {
863 		case B_SELECT_READ:
864 		{
865 			ssize_t available = socket_read_avail(socket);
866 			if ((ssize_t)socket->receive.low_water_mark <= available
867 				|| available < B_OK)
868 				notify_select_event(sync, event);
869 			break;
870 		}
871 		case B_SELECT_WRITE:
872 		{
873 			ssize_t available = socket_send_avail(socket);
874 			if ((ssize_t)socket->send.low_water_mark <= available
875 				|| available < B_OK)
876 				notify_select_event(sync, event);
877 			break;
878 		}
879 		case B_SELECT_ERROR:
880 			// TODO: B_SELECT_ERROR condition!
881 			break;
882 	}
883 
884 	return B_OK;
885 }
886 
887 
888 status_t
889 socket_cancel_notification(net_socket* _socket, uint8 event, selectsync* sync)
890 {
891 	net_socket_private* socket = (net_socket_private*)_socket;
892 
893 	MutexLocker _(socket->lock);
894 	return remove_select_sync_pool_entry(&socket->select_pool, sync, event);
895 }
896 
897 
898 status_t
899 socket_notify(net_socket* _socket, uint8 event, int32 value)
900 {
901 	net_socket_private* socket = (net_socket_private*)_socket;
902 	bool notify = true;
903 
904 	switch (event) {
905 		case B_SELECT_READ:
906 			if ((ssize_t)socket->receive.low_water_mark > value
907 				&& value >= B_OK)
908 				notify = false;
909 			break;
910 
911 		case B_SELECT_WRITE:
912 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
913 				notify = false;
914 			break;
915 
916 		case B_SELECT_ERROR:
917 			socket->error = value;
918 			break;
919 	}
920 
921 	MutexLocker _(socket->lock);
922 
923 	if (notify && socket->select_pool != NULL) {
924 		notify_select_event_pool(socket->select_pool, event);
925 
926 		if (event == B_SELECT_ERROR) {
927 			// always notify read/write on error
928 			notify_select_event_pool(socket->select_pool, B_SELECT_READ);
929 			notify_select_event_pool(socket->select_pool, B_SELECT_WRITE);
930 		}
931 	}
932 
933 	return B_OK;
934 }
935 
936 
937 //	#pragma mark - standard socket API
938 
939 
940 int
941 socket_accept(net_socket* socket, struct sockaddr* address,
942 	socklen_t* _addressLength, net_socket** _acceptedSocket)
943 {
944 	if ((socket->options & SO_ACCEPTCONN) == 0)
945 		return B_BAD_VALUE;
946 
947 	net_socket* accepted;
948 	status_t status = socket->first_info->accept(socket->first_protocol,
949 		&accepted);
950 	if (status < B_OK)
951 		return status;
952 
953 	if (address && *_addressLength > 0) {
954 		memcpy(address, &accepted->peer, min_c(*_addressLength,
955 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
956 		*_addressLength = accepted->peer.ss_len;
957 	}
958 
959 	*_acceptedSocket = accepted;
960 	return B_OK;
961 }
962 
963 
964 int
965 socket_bind(net_socket* socket, const struct sockaddr* address,
966 	socklen_t addressLength)
967 {
968 	sockaddr empty;
969 	if (address == NULL) {
970 		// special - try to bind to an empty address, like INADDR_ANY
971 		memset(&empty, 0, sizeof(sockaddr));
972 		empty.sa_len = sizeof(sockaddr);
973 		empty.sa_family = socket->family;
974 
975 		address = &empty;
976 		addressLength = sizeof(sockaddr);
977 	}
978 
979 	if (socket->address.ss_len != 0) {
980 		status_t status = socket->first_info->unbind(socket->first_protocol,
981 			(sockaddr*)&socket->address);
982 		if (status < B_OK)
983 			return status;
984 	}
985 
986 	memcpy(&socket->address, address, sizeof(sockaddr));
987 	socket->address.ss_len = sizeof(sockaddr_storage);
988 
989 	status_t status = socket->first_info->bind(socket->first_protocol,
990 		(sockaddr*)address);
991 	if (status < B_OK) {
992 		// clear address again, as binding failed
993 		socket->address.ss_len = 0;
994 	}
995 
996 	return status;
997 }
998 
999 
1000 int
1001 socket_connect(net_socket* socket, const struct sockaddr* address,
1002 	socklen_t addressLength)
1003 {
1004 	if (address == NULL || addressLength == 0)
1005 		return ENETUNREACH;
1006 
1007 	if (socket->address.ss_len == 0) {
1008 		// try to bind first
1009 		status_t status = socket_bind(socket, NULL, 0);
1010 		if (status < B_OK)
1011 			return status;
1012 	}
1013 
1014 	return socket->first_info->connect(socket->first_protocol, address);
1015 }
1016 
1017 
1018 int
1019 socket_getpeername(net_socket* socket, struct sockaddr* address,
1020 	socklen_t* _addressLength)
1021 {
1022 	if (socket->peer.ss_len == 0)
1023 		return ENOTCONN;
1024 
1025 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
1026 	*_addressLength = socket->peer.ss_len;
1027 	return B_OK;
1028 }
1029 
1030 
1031 int
1032 socket_getsockname(net_socket* socket, struct sockaddr* address,
1033 	socklen_t* _addressLength)
1034 {
1035 	if (socket->address.ss_len == 0)
1036 		return ENOTCONN;
1037 
1038 	memcpy(address, &socket->address, min_c(*_addressLength,
1039 		socket->address.ss_len));
1040 	*_addressLength = socket->address.ss_len;
1041 	return B_OK;
1042 }
1043 
1044 
1045 status_t
1046 socket_get_option(net_socket* socket, int level, int option, void* value,
1047 	int* _length)
1048 {
1049 	if (level != SOL_SOCKET)
1050 		return ENOPROTOOPT;
1051 
1052 	switch (option) {
1053 		case SO_SNDBUF:
1054 		{
1055 			uint32* size = (uint32*)value;
1056 			*size = socket->send.buffer_size;
1057 			*_length = sizeof(uint32);
1058 			return B_OK;
1059 		}
1060 
1061 		case SO_RCVBUF:
1062 		{
1063 			uint32* size = (uint32*)value;
1064 			*size = socket->receive.buffer_size;
1065 			*_length = sizeof(uint32);
1066 			return B_OK;
1067 		}
1068 
1069 		case SO_SNDLOWAT:
1070 		{
1071 			uint32* size = (uint32*)value;
1072 			*size = socket->send.low_water_mark;
1073 			*_length = sizeof(uint32);
1074 			return B_OK;
1075 		}
1076 
1077 		case SO_RCVLOWAT:
1078 		{
1079 			uint32* size = (uint32*)value;
1080 			*size = socket->receive.low_water_mark;
1081 			*_length = sizeof(uint32);
1082 			return B_OK;
1083 		}
1084 
1085 		case SO_RCVTIMEO:
1086 		case SO_SNDTIMEO:
1087 		{
1088 			if (*_length < (int)sizeof(struct timeval))
1089 				return B_BAD_VALUE;
1090 
1091 			bigtime_t timeout;
1092 			if (option == SO_SNDTIMEO)
1093 				timeout = socket->send.timeout;
1094 			else
1095 				timeout = socket->receive.timeout;
1096 			if (timeout == B_INFINITE_TIMEOUT)
1097 				timeout = 0;
1098 
1099 			struct timeval* timeval = (struct timeval*)value;
1100 			timeval->tv_sec = timeout / 1000000LL;
1101 			timeval->tv_usec = timeout % 1000000LL;
1102 
1103 			*_length = sizeof(struct timeval);
1104 			return B_OK;
1105 		}
1106 
1107 		case SO_NONBLOCK:
1108 		{
1109 			int32* _set = (int32*)value;
1110 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
1111 			*_length = sizeof(int32);
1112 			return B_OK;
1113 		}
1114 
1115 		case SO_ACCEPTCONN:
1116 		case SO_BROADCAST:
1117 		case SO_DEBUG:
1118 		case SO_DONTROUTE:
1119 		case SO_KEEPALIVE:
1120 		case SO_OOBINLINE:
1121 		case SO_REUSEADDR:
1122 		case SO_REUSEPORT:
1123 		case SO_USELOOPBACK:
1124 		{
1125 			int32* _set = (int32*)value;
1126 			*_set = (socket->options & option) != 0;
1127 			*_length = sizeof(int32);
1128 			return B_OK;
1129 		}
1130 
1131 		case SO_TYPE:
1132 		{
1133 			int32* _set = (int32*)value;
1134 			*_set = socket->type;
1135 			*_length = sizeof(int32);
1136 			return B_OK;
1137 		}
1138 
1139 		case SO_ERROR:
1140 		{
1141 			int32* _set = (int32*)value;
1142 			*_set = socket->error;
1143 			*_length = sizeof(int32);
1144 
1145 			socket->error = B_OK;
1146 				// clear error upon retrieval
1147 			return B_OK;
1148 		}
1149 
1150 		default:
1151 			break;
1152 	}
1153 
1154 	dprintf("socket_getsockopt: unknown option %d\n", option);
1155 	return ENOPROTOOPT;
1156 }
1157 
1158 
1159 int
1160 socket_getsockopt(net_socket* socket, int level, int option, void* value,
1161 	int* _length)
1162 {
1163 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
1164 		level, option, value, _length);
1165 }
1166 
1167 
1168 int
1169 socket_listen(net_socket* socket, int backlog)
1170 {
1171 	status_t status = socket->first_info->listen(socket->first_protocol,
1172 		backlog);
1173 	if (status == B_OK)
1174 		socket->options |= SO_ACCEPTCONN;
1175 
1176 	return status;
1177 }
1178 
1179 
1180 ssize_t
1181 socket_receive(net_socket* socket, msghdr* header, void* data, size_t length,
1182 	int flags)
1183 {
1184 	// If the protocol sports read_data_no_buffer() we use it.
1185 	if (socket->first_info->read_data_no_buffer != NULL)
1186 		return socket_receive_no_buffer(socket, header, data, length, flags);
1187 
1188 	size_t totalLength = length;
1189 	net_buffer* buffer;
1190 	int i;
1191 
1192 	// the convention to this function is that have header been
1193 	// present, { data, length } would have been iovec[0] and is
1194 	// always considered like that
1195 
1196 	if (header) {
1197 		// calculate the length considering all of the extra buffers
1198 		for (i = 1; i < header->msg_iovlen; i++)
1199 			totalLength += header->msg_iov[i].iov_len;
1200 	}
1201 
1202 	status_t status = socket->first_info->read_data(
1203 		socket->first_protocol, totalLength, flags, &buffer);
1204 	if (status < B_OK)
1205 		return status;
1206 
1207 	// process ancillary data
1208 	if (header != NULL) {
1209 		if (buffer != NULL && header->msg_control != NULL) {
1210 			ancillary_data_container* container
1211 				= gNetBufferModule.get_ancillary_data(buffer);
1212 			if (container != NULL)
1213 				status = process_ancillary_data(socket, container, header);
1214 			else
1215 				status = process_ancillary_data(socket, buffer, header);
1216 			if (status != B_OK) {
1217 				gNetBufferModule.free(buffer);
1218 				return status;
1219 			}
1220 		} else
1221 			header->msg_controllen = 0;
1222 	}
1223 
1224 	// TODO: - returning a NULL buffer when received 0 bytes
1225 	//         may not make much sense as we still need the address
1226 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
1227 
1228 	size_t nameLen = 0;
1229 
1230 	if (header) {
1231 		// TODO: - consider the control buffer options
1232 		nameLen = header->msg_namelen;
1233 		header->msg_namelen = 0;
1234 		header->msg_flags = 0;
1235 	}
1236 
1237 	if (buffer == NULL)
1238 		return 0;
1239 
1240 	size_t bytesReceived = buffer->size, bytesCopied = 0;
1241 
1242 	length = min_c(bytesReceived, length);
1243 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
1244 		gNetBufferModule.free(buffer);
1245 		return ENOBUFS;
1246 	}
1247 
1248 	// if first copy was a success, proceed to following
1249 	// copies as required
1250 	bytesCopied += length;
1251 
1252 	if (header) {
1253 		// we only start considering at iovec[1]
1254 		// as { data, length } is iovec[0]
1255 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
1256 			iovec& vec = header->msg_iov[i];
1257 			size_t toRead = min_c(bytesReceived - bytesCopied, vec.iov_len);
1258 			if (gNetBufferModule.read(buffer, bytesCopied, vec.iov_base,
1259 					toRead) < B_OK) {
1260 				break;
1261 			}
1262 
1263 			bytesCopied += toRead;
1264 		}
1265 
1266 		if (header->msg_name != NULL) {
1267 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
1268 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
1269 		}
1270 	}
1271 
1272 	gNetBufferModule.free(buffer);
1273 
1274 	if (bytesCopied < bytesReceived) {
1275 		if (header)
1276 			header->msg_flags = MSG_TRUNC;
1277 
1278 		if (flags & MSG_TRUNC)
1279 			return bytesReceived;
1280 	}
1281 
1282 	return bytesCopied;
1283 }
1284 
1285 
1286 ssize_t
1287 socket_send(net_socket* socket, msghdr* header, const void* data, size_t length,
1288 	int flags)
1289 {
1290 	const sockaddr* address = NULL;
1291 	socklen_t addressLength = 0;
1292 	size_t bytesLeft = length;
1293 
1294 	if (length > SSIZE_MAX)
1295 		return B_BAD_VALUE;
1296 
1297 	ancillary_data_container* ancillaryData = NULL;
1298 	CObjectDeleter<ancillary_data_container> ancillaryDataDeleter(NULL,
1299 		&delete_ancillary_data_container);
1300 
1301 	if (header != NULL) {
1302 		address = (const sockaddr*)header->msg_name;
1303 		addressLength = header->msg_namelen;
1304 
1305 		// get the ancillary data
1306 		if (header->msg_control != NULL) {
1307 			ancillaryData = create_ancillary_data_container();
1308 			if (ancillaryData == NULL)
1309 				return B_NO_MEMORY;
1310 			ancillaryDataDeleter.SetTo(ancillaryData);
1311 
1312 			status_t status = add_ancillary_data(socket, ancillaryData,
1313 				(cmsghdr*)header->msg_control, header->msg_controllen);
1314 			if (status != B_OK)
1315 				return status;
1316 		}
1317 	}
1318 
1319 	if (addressLength == 0)
1320 		address = NULL;
1321 	else if (address == NULL)
1322 		return B_BAD_VALUE;
1323 
1324 	if (socket->peer.ss_len != 0) {
1325 		if (address != NULL)
1326 			return EISCONN;
1327 
1328 		// socket is connected, we use that address
1329 		address = (struct sockaddr*)&socket->peer;
1330 		addressLength = socket->peer.ss_len;
1331 	}
1332 
1333 	if (address == NULL || addressLength == 0) {
1334 		// don't know where to send to:
1335 		return EDESTADDRREQ;
1336 	}
1337 
1338 	if ((socket->first_info->flags & NET_PROTOCOL_ATOMIC_MESSAGES) != 0
1339 		&& bytesLeft > socket->send.buffer_size)
1340 		return EMSGSIZE;
1341 
1342 	if (socket->address.ss_len == 0) {
1343 		// try to bind first
1344 		status_t status = socket_bind(socket, NULL, 0);
1345 		if (status != B_OK)
1346 			return status;
1347 	}
1348 
1349 	// If the protocol has a send_data_no_buffer() hook, we use that one.
1350 	if (socket->first_info->send_data_no_buffer != NULL) {
1351 		iovec stackVec = { (void*)data, length };
1352 		iovec* vecs = header ? header->msg_iov : &stackVec;
1353 		int vecCount = header ? header->msg_iovlen : 1;
1354 
1355 		ssize_t written = socket->first_info->send_data_no_buffer(
1356 			socket->first_protocol, vecs, vecCount, ancillaryData, address,
1357 			addressLength);
1358 		if (written > 0)
1359 			ancillaryDataDeleter.Detach();
1360 		return written;
1361 	}
1362 
1363 	// By convention, if a header is given, the (data, length) equals the first
1364 	// iovec. So drop the header, if it is the only iovec. Otherwise compute
1365 	// the size of the remaining ones.
1366 	if (header != NULL) {
1367 		if (header->msg_iovlen <= 1)
1368 			header = NULL;
1369 		else {
1370 // TODO: The iovecs have already been copied to kernel space. Simplify!
1371 			bytesLeft += compute_user_iovec_length(header->msg_iov + 1,
1372 				header->msg_iovlen - 1);
1373 		}
1374 	}
1375 
1376 	ssize_t bytesSent = 0;
1377 	size_t vecOffset = 0;
1378 	uint32 vecIndex = 0;
1379 
1380 	while (bytesLeft > 0) {
1381 		// TODO: useful, maybe even computed header space!
1382 		net_buffer* buffer = gNetBufferModule.create(256);
1383 		if (buffer == NULL)
1384 			return ENOBUFS;
1385 
1386 		while (buffer->size < socket->send.buffer_size
1387 			&& buffer->size < bytesLeft) {
1388 			if (vecIndex > 0 && vecOffset == 0) {
1389 				// retrieve next iovec buffer from header
1390 				iovec vec;
1391 				if (user_memcpy(&vec, header->msg_iov + vecIndex, sizeof(iovec))
1392 						< B_OK) {
1393 					gNetBufferModule.free(buffer);
1394 					return B_BAD_ADDRESS;
1395 				}
1396 
1397 				data = vec.iov_base;
1398 				length = vec.iov_len;
1399 			}
1400 
1401 			size_t bytes = length;
1402 			if (buffer->size + bytes > socket->send.buffer_size)
1403 				bytes = socket->send.buffer_size - buffer->size;
1404 
1405 			if (gNetBufferModule.append(buffer, data, bytes) < B_OK) {
1406 				gNetBufferModule.free(buffer);
1407 				return ENOBUFS;
1408 			}
1409 
1410 			if (bytes != length) {
1411 				// partial send
1412 				vecOffset = bytes;
1413 				length -= vecOffset;
1414 				data = (uint8*)data + vecOffset;
1415 			} else if (header != NULL) {
1416 				// proceed with next buffer, if any
1417 				vecOffset = 0;
1418 				vecIndex++;
1419 
1420 				if (vecIndex >= (uint32)header->msg_iovlen)
1421 					break;
1422 			}
1423 		}
1424 
1425 		// attach ancillary data to the first buffer
1426 		status_t status = B_OK;
1427 		if (ancillaryData != NULL) {
1428 			gNetBufferModule.set_ancillary_data(buffer, ancillaryData);
1429 			ancillaryDataDeleter.Detach();
1430 			ancillaryData = NULL;
1431 		}
1432 
1433 		size_t bufferSize = buffer->size;
1434 		buffer->flags = flags;
1435 		memcpy(buffer->source, &socket->address, socket->address.ss_len);
1436 		memcpy(buffer->destination, address, addressLength);
1437 		buffer->destination->sa_len = addressLength;
1438 
1439 		if (status == B_OK) {
1440 			status = socket->first_info->send_data(socket->first_protocol,
1441 				buffer);
1442 		}
1443 		if (status < B_OK) {
1444 			size_t sizeAfterSend = buffer->size;
1445 			gNetBufferModule.free(buffer);
1446 
1447 			if ((sizeAfterSend != bufferSize || bytesSent > 0)
1448 				&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
1449 				// this appears to be a partial write
1450 				return bytesSent + (bufferSize - sizeAfterSend);
1451 			}
1452 			return status;
1453 		}
1454 
1455 		bytesLeft -= bufferSize;
1456 		bytesSent += bufferSize;
1457 	}
1458 
1459 	return bytesSent;
1460 }
1461 
1462 
1463 status_t
1464 socket_set_option(net_socket* socket, int level, int option, const void* value,
1465 	int length)
1466 {
1467 	if (level != SOL_SOCKET)
1468 		return ENOPROTOOPT;
1469 
1470 	switch (option) {
1471 		// TODO: implement other options!
1472 		case SO_LINGER:
1473 		{
1474 			if (length < (int)sizeof(struct linger))
1475 				return B_BAD_VALUE;
1476 
1477 			struct linger* linger = (struct linger*)value;
1478 			if (linger->l_onoff) {
1479 				socket->options |= SO_LINGER;
1480 				socket->linger = linger->l_linger;
1481 			} else {
1482 				socket->options &= ~SO_LINGER;
1483 				socket->linger = 0;
1484 			}
1485 			return B_OK;
1486 		}
1487 
1488 		case SO_SNDBUF:
1489 			if (length != sizeof(uint32))
1490 				return B_BAD_VALUE;
1491 
1492 			socket->send.buffer_size = *(const uint32*)value;
1493 			return B_OK;
1494 
1495 		case SO_RCVBUF:
1496 			if (length != sizeof(uint32))
1497 				return B_BAD_VALUE;
1498 
1499 			socket->receive.buffer_size = *(const uint32*)value;
1500 			return B_OK;
1501 
1502 		case SO_SNDLOWAT:
1503 			if (length != sizeof(uint32))
1504 				return B_BAD_VALUE;
1505 
1506 			socket->send.low_water_mark = *(const uint32*)value;
1507 			return B_OK;
1508 
1509 		case SO_RCVLOWAT:
1510 			if (length != sizeof(uint32))
1511 				return B_BAD_VALUE;
1512 
1513 			socket->receive.low_water_mark = *(const uint32*)value;
1514 			return B_OK;
1515 
1516 		case SO_RCVTIMEO:
1517 		case SO_SNDTIMEO:
1518 		{
1519 			if (length != sizeof(struct timeval))
1520 				return B_BAD_VALUE;
1521 
1522 			const struct timeval* timeval = (const struct timeval*)value;
1523 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1524 			if (timeout == 0)
1525 				timeout = B_INFINITE_TIMEOUT;
1526 
1527 			if (option == SO_SNDTIMEO)
1528 				socket->send.timeout = timeout;
1529 			else
1530 				socket->receive.timeout = timeout;
1531 			return B_OK;
1532 		}
1533 
1534 		case SO_NONBLOCK:
1535 			if (length != sizeof(int32))
1536 				return B_BAD_VALUE;
1537 
1538 			if (*(const int32*)value) {
1539 				socket->send.timeout = 0;
1540 				socket->receive.timeout = 0;
1541 			} else {
1542 				socket->send.timeout = B_INFINITE_TIMEOUT;
1543 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1544 			}
1545 			return B_OK;
1546 
1547 		case SO_BROADCAST:
1548 		case SO_DEBUG:
1549 		case SO_DONTROUTE:
1550 		case SO_KEEPALIVE:
1551 		case SO_OOBINLINE:
1552 		case SO_REUSEADDR:
1553 		case SO_REUSEPORT:
1554 		case SO_USELOOPBACK:
1555 			if (length != sizeof(int32))
1556 				return B_BAD_VALUE;
1557 
1558 			if (*(const int32*)value)
1559 				socket->options |= option;
1560 			else
1561 				socket->options &= ~option;
1562 			return B_OK;
1563 
1564 		case SO_BINDTODEVICE:
1565 		{
1566 			if (length != sizeof(int32))
1567 				return B_BAD_VALUE;
1568 
1569 			int index = *(const int32*)value;
1570 			if (index < 0)
1571 				return B_BAD_VALUE;
1572 
1573 			// TODO: we might want to check if the device exists at all
1574 			// (although it doesn't really harm when we don't)
1575 			socket->bound_to_device = index;
1576 			return B_OK;
1577 		}
1578 
1579 		default:
1580 			break;
1581 	}
1582 
1583 	dprintf("socket_setsockopt: unknown option %d\n", option);
1584 	return ENOPROTOOPT;
1585 }
1586 
1587 
1588 int
1589 socket_setsockopt(net_socket* socket, int level, int option, const void* value,
1590 	int length)
1591 {
1592 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1593 		level, option, value, length);
1594 }
1595 
1596 
1597 int
1598 socket_shutdown(net_socket* socket, int direction)
1599 {
1600 	return socket->first_info->shutdown(socket->first_protocol, direction);
1601 }
1602 
1603 
1604 status_t
1605 socket_socketpair(int family, int type, int protocol, net_socket* sockets[2])
1606 {
1607 	sockets[0] = NULL;
1608 	sockets[1] = NULL;
1609 
1610 	// create sockets
1611 	status_t error = socket_open(family, type, protocol, &sockets[0]);
1612 	if (error != B_OK)
1613 		return error;
1614 
1615 	if (error == B_OK)
1616 		error = socket_open(family, type, protocol, &sockets[1]);
1617 
1618 	// bind one
1619 	if (error == B_OK)
1620 		error = socket_bind(sockets[0], NULL, 0);
1621 
1622 	// start listening
1623 	if (error == B_OK)
1624 		error = socket_listen(sockets[0], 1);
1625 
1626 	// connect them
1627 	if (error == B_OK) {
1628 		error = socket_connect(sockets[1], (sockaddr*)&sockets[0]->address,
1629 			sockets[0]->address.ss_len);
1630 	}
1631 
1632 	// accept a socket
1633 	net_socket* acceptedSocket = NULL;
1634 	if (error == B_OK)
1635 		error = socket_accept(sockets[0], NULL, NULL, &acceptedSocket);
1636 
1637 	if (error == B_OK) {
1638 		// everything worked: close the listener socket
1639 		socket_close(sockets[0]);
1640 		socket_free(sockets[0]);
1641 		sockets[0] = acceptedSocket;
1642 	} else {
1643 		// close sockets on error
1644 		for (int i = 0; i < 2; i++) {
1645 			if (sockets[i] != NULL) {
1646 				socket_close(sockets[i]);
1647 				socket_free(sockets[i]);
1648 				sockets[i] = NULL;
1649 			}
1650 		}
1651 	}
1652 
1653 	return error;
1654 }
1655 
1656 
1657 //	#pragma mark -
1658 
1659 
1660 static status_t
1661 socket_std_ops(int32 op, ...)
1662 {
1663 	switch (op) {
1664 		case B_MODULE_INIT:
1665 		{
1666 			new (&sSocketList) SocketList;
1667 			mutex_init(&sSocketLock, "socket list");
1668 
1669 #ifdef ADD_DEBUGGER_COMMANDS
1670 			add_debugger_command("sockets", dump_sockets, "lists all sockets");
1671 			add_debugger_command("socket", dump_socket, "dumps a socket");
1672 #endif
1673 			return B_OK;
1674 		}
1675 		case B_MODULE_UNINIT:
1676 			ASSERT(sSocketList.IsEmpty());
1677 			mutex_destroy(&sSocketLock);
1678 
1679 #ifdef ADD_DEBUGGER_COMMANDS
1680 			remove_debugger_command("socket", dump_socket);
1681 			remove_debugger_command("sockets", dump_sockets);
1682 #endif
1683 			return B_OK;
1684 
1685 		default:
1686 			return B_ERROR;
1687 	}
1688 }
1689 
1690 
1691 net_socket_module_info gNetSocketModule = {
1692 	{
1693 		NET_SOCKET_MODULE_NAME,
1694 		0,
1695 		socket_std_ops
1696 	},
1697 	socket_open,
1698 	socket_close,
1699 	socket_free,
1700 
1701 	socket_readv,
1702 	socket_writev,
1703 	socket_control,
1704 
1705 	socket_read_avail,
1706 	socket_send_avail,
1707 
1708 	socket_send_data,
1709 	socket_receive_data,
1710 
1711 	socket_get_option,
1712 	socket_set_option,
1713 
1714 	socket_get_next_stat,
1715 
1716 	// connections
1717 	socket_acquire,
1718 	socket_release,
1719 	socket_spawn_pending,
1720 	socket_dequeue_connected,
1721 	socket_count_connected,
1722 	socket_set_max_backlog,
1723 	socket_has_parent,
1724 	socket_connected,
1725 	socket_aborted,
1726 
1727 	// notifications
1728 	socket_request_notification,
1729 	socket_cancel_notification,
1730 	socket_notify,
1731 
1732 	// standard socket API
1733 	socket_accept,
1734 	socket_bind,
1735 	socket_connect,
1736 	socket_getpeername,
1737 	socket_getsockname,
1738 	socket_getsockopt,
1739 	socket_listen,
1740 	socket_receive,
1741 	socket_send,
1742 	socket_setsockopt,
1743 	socket_shutdown,
1744 	socket_socketpair
1745 };
1746 
1747