xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision 5115ca085884f7b604a3d607688f0ca20fb7cf57)
1 /*
2  * Copyright 2006-2007, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <net_protocol.h>
13 #include <net_stack.h>
14 #include <net_stat.h>
15 
16 #include <KernelExport.h>
17 #include <team.h>
18 #include <util/AutoLock.h>
19 #include <util/list.h>
20 #include <fs/select_sync_pool.h>
21 
22 #include <new>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/time.h>
26 
27 
28 struct net_socket_private : net_socket {
29 	struct list_link		link;
30 	team_id					owner;
31 	uint32					max_backlog;
32 	uint32					child_count;
33 	struct list				pending_children;
34 	struct list				connected_children;
35 
36 	struct select_sync_pool	*select_pool;
37 	benaphore				lock;
38 };
39 
40 
41 void socket_delete(net_socket *socket);
42 int socket_bind(net_socket *socket, const struct sockaddr *address, socklen_t addressLength);
43 
44 struct list sSocketList;
45 benaphore sSocketLock;
46 
47 
48 static status_t
49 create_socket(int family, int type, int protocol, net_socket_private **_socket)
50 {
51 	struct net_socket_private *socket = new (std::nothrow) net_socket_private;
52 	if (socket == NULL)
53 		return B_NO_MEMORY;
54 
55 	memset(socket, 0, sizeof(net_socket_private));
56 	socket->family = family;
57 	socket->type = type;
58 	socket->protocol = protocol;
59 
60 	status_t status = benaphore_init(&socket->lock, "socket");
61 	if (status < B_OK)
62 		goto err1;
63 
64 	// set defaults (may be overridden by the protocols)
65 	socket->send.buffer_size = 65535;
66 	socket->send.low_water_mark = 1;
67 	socket->send.timeout = B_INFINITE_TIMEOUT;
68 	socket->receive.buffer_size = 65535;
69 	socket->receive.low_water_mark = 1;
70 	socket->receive.timeout = B_INFINITE_TIMEOUT;
71 
72 	list_init_etc(&socket->pending_children, offsetof(net_socket_private, link));
73 	list_init_etc(&socket->connected_children, offsetof(net_socket_private, link));
74 
75 	status = get_domain_protocols(socket);
76 	if (status < B_OK)
77 		goto err2;
78 
79 	*_socket = socket;
80 	return B_OK;
81 
82 err2:
83 	benaphore_destroy(&socket->lock);
84 err1:
85 	delete socket;
86 	return status;
87 }
88 
89 
90 //	#pragma mark -
91 
92 
93 status_t
94 socket_open(int family, int type, int protocol, net_socket **_socket)
95 {
96 	net_socket_private *socket;
97 	status_t status = create_socket(family, type, protocol, &socket);
98 	if (status < B_OK)
99 		return status;
100 
101 	status = socket->first_info->open(socket->first_protocol);
102 	if (status < B_OK) {
103 		socket_delete(socket);
104 		return status;
105 	}
106 
107 	socket->owner = team_get_current_team_id();
108 
109 	benaphore_lock(&sSocketLock);
110 	list_add_item(&sSocketList, socket);
111 	benaphore_unlock(&sSocketLock);
112 
113 	*_socket = socket;
114 	return B_OK;
115 }
116 
117 
118 status_t
119 socket_close(net_socket *_socket)
120 {
121 	net_socket_private *socket = (net_socket_private *)_socket;
122 
123 	if (socket->select_pool) {
124 		// notify all pending selects
125 		notify_select_event_pool(socket->select_pool, ~0);
126 	}
127 
128 	return socket->first_info->close(socket->first_protocol);
129 }
130 
131 
132 status_t
133 socket_free(net_socket *socket)
134 {
135 	status_t status = socket->first_info->free(socket->first_protocol);
136 	if (status == B_BUSY)
137 		return B_OK;
138 
139 	socket_delete(socket);
140 	return B_OK;
141 }
142 
143 
144 status_t
145 socket_readv(net_socket *socket, const iovec *vecs, size_t vecCount, size_t *_length)
146 {
147 	return -1;
148 }
149 
150 
151 status_t
152 socket_writev(net_socket *socket, const iovec *vecs, size_t vecCount, size_t *_length)
153 {
154 	if (socket->peer.ss_len == 0)
155 		return ECONNRESET;
156 
157 	if (socket->address.ss_len == 0) {
158 		// try to bind first
159 		status_t status = socket_bind(socket, NULL, 0);
160 		if (status < B_OK)
161 			return status;
162 	}
163 
164 	// TODO: useful, maybe even computed header space!
165 	net_buffer *buffer = gNetBufferModule.create(256);
166 	if (buffer == NULL)
167 		return ENOBUFS;
168 
169 	// copy data into buffer
170 
171 	for (uint32 i = 0; i < vecCount; i++) {
172 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
173 				vecs[i].iov_len) < B_OK) {
174 			gNetBufferModule.free(buffer);
175 			return ENOBUFS;
176 		}
177 	}
178 
179 	memcpy(&buffer->source, &socket->address, socket->address.ss_len);
180 	memcpy(&buffer->destination, &socket->peer, socket->peer.ss_len);
181 	size_t size = buffer->size;
182 
183 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
184 		buffer);
185 	if (bytesWritten < B_OK) {
186 		if (buffer->size != size) {
187 			// this appears to be a partial write
188 			*_length = size - buffer->size;
189 		}
190 		gNetBufferModule.free(buffer);
191 		return bytesWritten;
192 	}
193 
194 	*_length = bytesWritten;
195 	return B_OK;
196 }
197 
198 
199 status_t
200 socket_control(net_socket *socket, int32 op, void *data, size_t length)
201 {
202 	return socket->first_info->control(socket->first_protocol,
203 		LEVEL_DRIVER_IOCTL, op, data, &length);
204 }
205 
206 
207 ssize_t
208 socket_read_avail(net_socket *socket)
209 {
210 	return socket->first_info->read_avail(socket->first_protocol);
211 }
212 
213 
214 ssize_t
215 socket_send_avail(net_socket *socket)
216 {
217 	return socket->first_info->send_avail(socket->first_protocol);
218 }
219 
220 
221 status_t
222 socket_send_data(net_socket *socket, net_buffer *buffer)
223 {
224 	return socket->first_info->send_data(socket->first_protocol,
225 		buffer);
226 }
227 
228 
229 status_t
230 socket_receive_data(net_socket *socket, size_t length, uint32 flags,
231 	net_buffer **_buffer)
232 {
233 	status_t status = socket->first_info->read_data(socket->first_protocol,
234 		length, flags, _buffer);
235 
236 	if (status < B_OK)
237 		return status;
238 
239 	if (*_buffer && length < (*_buffer)->size) {
240 		// discard any data behind the amount requested
241 		gNetBufferModule.trim(*_buffer, length);
242 	}
243 
244 	return status;
245 }
246 
247 
248 status_t
249 socket_get_next_stat(uint32 *_cookie, int family, struct net_stat *stat)
250 {
251 	BenaphoreLocker locker(sSocketLock);
252 
253 	net_socket_private *socket = NULL;
254 	uint32 cookie = *_cookie;
255 	uint32 count = 0;
256 	while ((socket = (net_socket_private *)list_get_next_item(&sSocketList, socket)) != NULL) {
257 		// TODO: also traverse the pending connections
258 		if (count == cookie)
259 			break;
260 
261 		if (family == -1 || family == socket->family)
262 			count++;
263 	}
264 
265 	if (socket == NULL)
266 		return B_ENTRY_NOT_FOUND;
267 
268 	*_cookie = count + 1;
269 
270 	stat->family = socket->family;
271 	stat->type = socket->type;
272 	stat->protocol = socket->protocol;
273 	stat->owner = socket->owner;
274 	stat->state[0] = '\0';
275 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
276 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
277 	stat->receive_queue_size = 0;
278 	stat->send_queue_size = 0;
279 
280 	// fill in protocol specific data (if supported by the protocol)
281 	size_t length = sizeof(net_stat);
282 	socket->first_info->control(socket->first_protocol, socket->protocol,
283 		NET_STAT_SOCKET, stat, &length);
284 
285 	return B_OK;
286 }
287 
288 
289 //	#pragma mark - connections
290 
291 
292 status_t
293 socket_spawn_pending(net_socket *_parent, net_socket **_socket)
294 {
295 	net_socket_private *parent = (net_socket_private *)_parent;
296 
297 	BenaphoreLocker locker(parent->lock);
298 
299 	// We actually accept more pending connections to compensate for those
300 	// that never complete, and also make sure at least a single connection
301 	// can always be accepted
302 	if (parent->child_count > 3 * parent->max_backlog / 2)
303 		return ENOBUFS;
304 
305 	net_socket_private *socket;
306 	status_t status = create_socket(parent->family, parent->type, parent->protocol,
307 		&socket);
308 	if (status < B_OK)
309 		return status;
310 
311 	// inherit parent's properties
312 	socket->send = parent->send;
313 	socket->receive = parent->receive;
314 	socket->options = parent->options & ~SO_ACCEPTCONN;
315 	socket->linger = parent->linger;
316 	socket->owner = parent->owner;
317 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
318 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
319 
320 	// add to the parent's list of pending connections
321 	list_add_item(&parent->pending_children, socket);
322 	socket->parent = parent;
323 	parent->child_count++;
324 
325 	*_socket = socket;
326 	return B_OK;
327 }
328 
329 
330 void
331 socket_delete(net_socket *_socket)
332 {
333 	net_socket_private *socket = (net_socket_private *)_socket;
334 
335 	if (socket->parent != NULL)
336 		panic("socket still has a parent!");
337 
338 	benaphore_lock(&sSocketLock);
339 	list_remove_item(&sSocketList, socket);
340 	benaphore_unlock(&sSocketLock);
341 
342 	put_domain_protocols(socket);
343 	benaphore_destroy(&socket->lock);
344 	delete_select_sync_pool(socket->select_pool);
345 	delete socket;
346 }
347 
348 
349 status_t
350 socket_dequeue_connected(net_socket *_parent, net_socket **_socket)
351 {
352 	net_socket_private *parent = (net_socket_private *)_parent;
353 
354 	benaphore_lock(&parent->lock);
355 
356 	net_socket_private *socket = (net_socket_private *)list_remove_head_item(
357 		&parent->connected_children);
358 	if (socket != NULL) {
359 		socket->parent = NULL;
360 		parent->child_count--;
361 		*_socket = socket;
362 	}
363 
364 	benaphore_unlock(&parent->lock);
365 
366 	if (socket == NULL)
367 		return B_ENTRY_NOT_FOUND;
368 
369 	benaphore_lock(&sSocketLock);
370 	list_add_item(&sSocketList, socket);
371 	benaphore_unlock(&sSocketLock);
372 
373 	return B_OK;
374 }
375 
376 
377 ssize_t
378 socket_count_connected(net_socket *_parent)
379 {
380 	net_socket_private *parent = (net_socket_private *)_parent;
381 
382 	BenaphoreLocker _(parent->lock);
383 
384 	int count = 0;
385 	for (void *it = list_get_first_item(&parent->connected_children);
386 			it != NULL; it = list_get_next_item(&parent->connected_children, it))
387 		count++;
388 
389 	return count;
390 }
391 
392 
393 status_t
394 socket_set_max_backlog(net_socket *_socket, uint32 backlog)
395 {
396 	net_socket_private *socket = (net_socket_private *)_socket;
397 
398 	// we enforce an upper limit of connections waiting to be accepted
399 	if (backlog > 256)
400 		backlog = 256;
401 
402 	benaphore_lock(&socket->lock);
403 
404 	// first remove the pending connections, then the already connected ones as needed
405 	net_socket_private *child;
406 	while (socket->child_count > backlog
407 		&& (child = (net_socket_private *)list_remove_tail_item(&socket->pending_children)) != NULL) {
408 		child->parent = NULL;
409 		socket->child_count--;
410 	}
411 	while (socket->child_count > backlog
412 		&& (child = (net_socket_private *)list_remove_tail_item(&socket->connected_children)) != NULL) {
413 		child->parent = NULL;
414 		socket_delete(child);
415 		socket->child_count--;
416 	}
417 
418 	socket->max_backlog = backlog;
419 	benaphore_unlock(&socket->lock);
420 	return B_OK;
421 }
422 
423 
424 /*!
425 	The socket has been connected. It will be moved to the connected queue
426 	of its parent socket.
427 */
428 status_t
429 socket_connected(net_socket *socket)
430 {
431 	net_socket_private *parent = (net_socket_private *)socket->parent;
432 	if (parent == NULL)
433 		return B_BAD_VALUE;
434 
435 	benaphore_lock(&parent->lock);
436 
437 	list_remove_item(&parent->pending_children, socket);
438 	list_add_item(&parent->connected_children, socket);
439 
440 	// notify parent
441 	if (parent->select_pool)
442 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
443 
444 	benaphore_unlock(&parent->lock);
445 	return B_OK;
446 }
447 
448 
449 //	#pragma mark - notifications
450 
451 
452 status_t
453 socket_request_notification(net_socket *_socket, uint8 event, uint32 ref,
454 	selectsync *sync)
455 {
456 	net_socket_private *socket = (net_socket_private *)_socket;
457 
458 	benaphore_lock(&socket->lock);
459 
460 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
461 		ref, event);
462 
463 	benaphore_unlock(&socket->lock);
464 
465 	if (status < B_OK)
466 		return status;
467 
468 	// check if the event is already present
469 	// TODO: add support for poll() types
470 
471 	switch (event) {
472 		case B_SELECT_READ:
473 		{
474 			ssize_t available = socket_read_avail(socket);
475 			if ((ssize_t)socket->receive.low_water_mark <= available || available < B_OK)
476 				notify_select_event(sync, ref, event);
477 			break;
478 		}
479 		case B_SELECT_WRITE:
480 		{
481 			ssize_t available = socket_send_avail(socket);
482 			if ((ssize_t)socket->send.low_water_mark <= available || available < B_OK)
483 				notify_select_event(sync, ref, event);
484 			break;
485 		}
486 		case B_SELECT_ERROR:
487 			// TODO: B_SELECT_ERROR condition!
488 			break;
489 	}
490 
491 	return B_OK;
492 }
493 
494 
495 status_t
496 socket_cancel_notification(net_socket *_socket, uint8 event, selectsync *sync)
497 {
498 	net_socket_private *socket = (net_socket_private *)_socket;
499 
500 	benaphore_lock(&socket->lock);
501 
502 	status_t status = remove_select_sync_pool_entry(&socket->select_pool,
503 		sync, event);
504 
505 	benaphore_unlock(&socket->lock);
506 	return status;
507 }
508 
509 
510 status_t
511 socket_notify(net_socket *_socket, uint8 event, int32 value)
512 {
513 	net_socket_private *socket = (net_socket_private *)_socket;
514 	bool notify = true;
515 
516 	switch (event) {
517 		case B_SELECT_READ:
518 			if ((ssize_t)socket->receive.low_water_mark > value && value >= B_OK)
519 				notify = false;
520 			break;
521 
522 		case B_SELECT_WRITE:
523 			if ((ssize_t)socket->send.low_water_mark > value && value >= B_OK)
524 				notify = false;
525 			break;
526 
527 		case B_SELECT_ERROR:
528 			socket->error = value;
529 			break;
530 	}
531 
532 	benaphore_lock(&socket->lock);
533 
534 	if (notify && socket->select_pool)
535 		notify_select_event_pool(socket->select_pool, event);
536 
537 	benaphore_unlock(&socket->lock);
538 	return B_OK;
539 }
540 
541 
542 //	#pragma mark - standard socket API
543 
544 
545 int
546 socket_accept(net_socket *socket, struct sockaddr *address, socklen_t *_addressLength,
547 	net_socket **_acceptedSocket)
548 {
549 	if ((socket->options & SO_ACCEPTCONN) == 0)
550 		return B_BAD_VALUE;
551 
552 	net_socket *accepted;
553 	status_t status = socket->first_info->accept(socket->first_protocol,
554 		&accepted);
555 	if (status < B_OK)
556 		return status;
557 
558 	if (address && *_addressLength > 0) {
559 		memcpy(address, &accepted->peer, min_c(*_addressLength,
560 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
561 		*_addressLength = accepted->peer.ss_len;
562 	}
563 
564 	*_acceptedSocket = accepted;
565 	return B_OK;
566 }
567 
568 
569 int
570 socket_bind(net_socket *socket, const struct sockaddr *address, socklen_t addressLength)
571 {
572 	sockaddr empty;
573 	if (address == NULL) {
574 		// special - try to bind to an empty address, like INADDR_ANY
575 		memset(&empty, 0, sizeof(sockaddr));
576 		empty.sa_len = sizeof(sockaddr);
577 		empty.sa_family = socket->family;
578 
579 		address = &empty;
580 		addressLength = sizeof(sockaddr);
581 	}
582 
583 	if (socket->address.ss_len != 0) {
584 		status_t status = socket->first_info->unbind(socket->first_protocol,
585 			(sockaddr *)&socket->address);
586 		if (status < B_OK)
587 			return status;
588 	}
589 
590 	memcpy(&socket->address, address, sizeof(sockaddr));
591 
592 	status_t status = socket->first_info->bind(socket->first_protocol,
593 		(sockaddr *)address);
594 	if (status < B_OK) {
595 		// clear address again, as binding failed
596 		socket->address.ss_len = 0;
597 	}
598 
599 	return status;
600 }
601 
602 
603 int
604 socket_connect(net_socket *socket, const struct sockaddr *address, socklen_t addressLength)
605 {
606 	if (address == NULL || addressLength == 0)
607 		return ENETUNREACH;
608 
609 	if (socket->address.ss_len == 0) {
610 		// try to bind first
611 		status_t status = socket_bind(socket, NULL, 0);
612 		if (status < B_OK)
613 			return status;
614 	}
615 
616 	return socket->first_info->connect(socket->first_protocol, address);
617 }
618 
619 
620 int
621 socket_getpeername(net_socket *socket, struct sockaddr *address, socklen_t *_addressLength)
622 {
623 	if (socket->peer.ss_len == 0)
624 		return ENOTCONN;
625 
626 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
627 	*_addressLength = socket->peer.ss_len;
628 	return B_OK;
629 }
630 
631 
632 int
633 socket_getsockname(net_socket *socket, struct sockaddr *address, socklen_t *_addressLength)
634 {
635 	if (socket->address.ss_len == 0)
636 		return ENOTCONN;
637 
638 	memcpy(address, &socket->address, min_c(*_addressLength, socket->address.ss_len));
639 	*_addressLength = socket->address.ss_len;
640 	return B_OK;
641 }
642 
643 
644 status_t
645 socket_get_option(net_socket *socket, int level, int option, void *value,
646 	int *_length)
647 {
648 	if (level != SOL_SOCKET)
649 		return ENOPROTOOPT;
650 
651 	switch (option) {
652 		case SO_SNDBUF:
653 		{
654 			uint32 *size = (uint32 *)value;
655 			*size = socket->send.buffer_size;
656 			*_length = sizeof(uint32);
657 			return B_OK;
658 		}
659 
660 		case SO_RCVBUF:
661 		{
662 			uint32 *size = (uint32 *)value;
663 			*size = socket->receive.buffer_size;
664 			*_length = sizeof(uint32);
665 			return B_OK;
666 		}
667 
668 		case SO_SNDLOWAT:
669 		{
670 			uint32 *size = (uint32 *)value;
671 			*size = socket->send.low_water_mark;
672 			*_length = sizeof(uint32);
673 			return B_OK;
674 		}
675 
676 		case SO_RCVLOWAT:
677 		{
678 			uint32 *size = (uint32 *)value;
679 			*size = socket->receive.low_water_mark;
680 			*_length = sizeof(uint32);
681 			return B_OK;
682 		}
683 
684 		case SO_RCVTIMEO:
685 		case SO_SNDTIMEO:
686 		{
687 			if (*_length < (int)sizeof(struct timeval))
688 				return B_BAD_VALUE;
689 
690 			bigtime_t timeout;
691 			if (option == SO_SNDTIMEO)
692 				timeout = socket->send.timeout;
693 			else
694 				timeout = socket->receive.timeout;
695 			if (timeout == B_INFINITE_TIMEOUT)
696 				timeout = 0;
697 
698 			struct timeval *timeval = (struct timeval *)value;
699 			timeval->tv_sec = timeout / 1000000LL;
700 			timeval->tv_usec = timeout % 1000000LL;
701 
702 			*_length = sizeof(struct timeval);
703 			return B_OK;
704 		}
705 
706 		case SO_NONBLOCK:
707 		{
708 			int32 *_set = (int32 *)value;
709 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
710 			*_length = sizeof(int32);
711 			return B_OK;
712 		}
713 
714 		case SO_ACCEPTCONN:
715 		case SO_BROADCAST:
716 		case SO_DEBUG:
717 		case SO_DONTROUTE:
718 		case SO_KEEPALIVE:
719 		case SO_OOBINLINE:
720 		case SO_REUSEADDR:
721 		case SO_REUSEPORT:
722 		case SO_USELOOPBACK:
723 		{
724 			int32 *_set = (int32 *)value;
725 			*_set = (socket->options & option) != 0;
726 			*_length = sizeof(int32);
727 			return B_OK;
728 		}
729 
730 		case SO_ERROR:
731 		{
732 			int32 *_set = (int32 *)value;
733 			*_set = socket->error;
734 			*_length = sizeof(int32);
735 
736 			socket->error = B_OK;
737 				// clear error upon retrieval
738 			return B_OK;
739 		}
740 
741 		default:
742 			break;
743 	}
744 
745 	dprintf("socket_getsockopt: unknown option %d\n", option);
746 	return ENOPROTOOPT;
747 }
748 
749 
750 int
751 socket_getsockopt(net_socket *socket, int level, int option, void *value,
752 	int *_length)
753 {
754 	for (net_protocol *protocol = socket->first_protocol;
755 			protocol; protocol = protocol->next) {
756 		if (protocol->module->getsockopt)
757 			return protocol->module->getsockopt(protocol, level, option,
758 				value, _length);
759 	}
760 
761 	return socket_get_option(socket, level, option, value, _length);
762 }
763 
764 
765 int
766 socket_listen(net_socket *socket, int backlog)
767 {
768 	status_t status = socket->first_info->listen(socket->first_protocol, backlog);
769 	if (status == B_OK)
770 		socket->options |= SO_ACCEPTCONN;
771 
772 	return status;
773 }
774 
775 
776 ssize_t
777 socket_receive(net_socket *socket, msghdr *header, void *data, size_t length,
778 	int flags)
779 {
780 	size_t totalLength = length;
781 	net_buffer *buffer;
782 	iovec tmp;
783 	int i;
784 
785 	// the convention to this function is that have header been
786 	// present, { data, length } would have been iovec[0] and is
787 	// always considered like that
788 
789 	if (header) {
790 		// calculate the length considering all of the extra buffers
791 		for (i = 1; i < header->msg_iovlen; i++) {
792 			if (user_memcpy(&tmp, header->msg_iov + i, sizeof(iovec)) < B_OK)
793 				return B_BAD_ADDRESS;
794 			if (tmp.iov_len > 0 && tmp.iov_base == NULL)
795 				return B_BAD_ADDRESS;
796 			totalLength += tmp.iov_len;
797 		}
798 	}
799 
800 	status_t status = socket->first_info->read_data(
801 		socket->first_protocol, totalLength, flags, &buffer);
802 	if (status < B_OK)
803 		return status;
804 
805 	// TODO: - returning a NULL buffer when received 0 bytes
806 	//         may not make much sense as we still need the address
807 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
808 
809 	size_t nameLen = 0;
810 
811 	if (header) {
812 		// TODO: - consider the control buffer options
813 		nameLen = header->msg_namelen;
814 		header->msg_namelen = 0;
815 		header->msg_flags = 0;
816 	}
817 
818 	if (buffer == NULL)
819 		return 0;
820 
821 	size_t bytesReceived = buffer->size, bytesCopied = 0;
822 
823 	length = min_c(bytesReceived, length);
824 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
825 		gNetBufferModule.free(buffer);
826 		return ENOBUFS;
827 	}
828 
829 	// if first copy was a success, proceed to following
830 	// copies as required
831 	bytesCopied += length;
832 
833 	if (header) {
834 		// we only start considering at iovec[1]
835 		// as { data, length } is iovec[0]
836 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
837 			if (user_memcpy(&tmp, header->msg_iov + i, sizeof(iovec)) < B_OK)
838 				break;
839 
840 			size_t toRead = min_c(bytesReceived - bytesCopied, tmp.iov_len);
841 			if (gNetBufferModule.read(buffer, bytesCopied, tmp.iov_base,
842 										toRead) < B_OK)
843 				break;
844 
845 			bytesCopied += toRead;
846 		}
847 
848 		if (header->msg_name != NULL) {
849 			header->msg_namelen = min_c(nameLen, buffer->source.ss_len);
850 			memcpy(header->msg_name, &buffer->source, header->msg_namelen);
851 		}
852 	}
853 
854 	gNetBufferModule.free(buffer);
855 
856 	if (bytesCopied < bytesReceived) {
857 		if (header)
858 			header->msg_flags = MSG_TRUNC;
859 
860 		if (flags & MSG_TRUNC)
861 			return bytesReceived;
862 	}
863 
864 	return bytesCopied;
865 }
866 
867 
868 ssize_t
869 socket_send(net_socket *socket, msghdr *header, const void *data,
870 	size_t length, int flags)
871 {
872 	const sockaddr *address = NULL;
873 	socklen_t addressLength = 0;
874 
875 	// the convention to this function is that have header been
876 	// present, { data, length } would have been iovec[0] and is
877 	// always considered like that
878 
879 	if (header != NULL) {
880 		address = (const sockaddr *)header->msg_name;
881 		addressLength = header->msg_namelen;
882 
883 		if (header->msg_iovlen <= 1)
884 			header = NULL;
885 	}
886 
887 	if (addressLength == 0)
888 		address = NULL;
889 	else if (addressLength != 0 && address == NULL)
890 		return B_BAD_VALUE;
891 
892 	if (socket->peer.ss_len != 0) {
893 		if (address != NULL)
894 			return EISCONN;
895 
896 		// socket is connected, we use that address
897 		address = (struct sockaddr *)&socket->peer;
898 		addressLength = socket->peer.ss_len;
899 	}
900 
901 	if (address == NULL || addressLength == 0) {
902 		// don't know where to send to:
903 		return EDESTADDRREQ;
904 	}
905 
906 	if (socket->address.ss_len == 0) {
907 		// try to bind first
908 		status_t status = socket_bind(socket, NULL, 0);
909 		if (status < B_OK)
910 			return status;
911 	}
912 
913 	// TODO: useful, maybe even computed header space!
914 	net_buffer *buffer = gNetBufferModule.create(256);
915 	if (buffer == NULL)
916 		return ENOBUFS;
917 
918 	if (gNetBufferModule.append(buffer, data, length) < B_OK) {
919 		gNetBufferModule.free(buffer);
920 		return ENOBUFS;
921 	}
922 
923 	if (header) {
924 		// copy additional data into buffer
925 		for (int i = 1; i < header->msg_iovlen; i++) {
926 			iovec tmp;
927 			if (user_memcpy(&tmp, header->msg_iov + i, sizeof(iovec)) < B_OK ||
928 				gNetBufferModule.append(buffer, tmp.iov_base, tmp.iov_len) < B_OK) {
929 				gNetBufferModule.free(buffer);
930 				return ENOBUFS;
931 			}
932 
933 			length += tmp.iov_len;
934 		}
935 	}
936 
937 	buffer->flags = flags;
938 	memcpy(&buffer->source, &socket->address, socket->address.ss_len);
939 	memcpy(&buffer->destination, address, addressLength);
940 
941 	status_t status = socket->first_info->send_data(socket->first_protocol,
942 		buffer);
943 	if (status < B_OK) {
944 		size_t size = buffer->size;
945 		gNetBufferModule.free(buffer);
946 
947 		if (size != length && (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
948 			// this appears to be a partial write
949 			return length - size;
950 		}
951 		return status;
952 	}
953 
954 	return length;
955 }
956 
957 
958 status_t
959 socket_set_option(net_socket *socket, int level, int option, const void *value,
960 	int length)
961 {
962 	if (level != SOL_SOCKET)
963 		return ENOPROTOOPT;
964 
965 	switch (option) {
966 		// TODO: implement other options!
967 		case SO_LINGER:
968 		{
969 			if (length < (int)sizeof(struct linger))
970 				return B_BAD_VALUE;
971 
972 			struct linger *linger = (struct linger *)value;
973 			if (linger->l_onoff) {
974 				socket->options |= SO_LINGER;
975 				socket->linger = linger->l_linger;
976 			} else {
977 				socket->options &= ~SO_LINGER;
978 				socket->linger = 0;
979 			}
980 			return B_OK;
981 		}
982 
983 		case SO_SNDBUF:
984 			if (length != sizeof(uint32))
985 				return B_BAD_VALUE;
986 
987 			socket->send.buffer_size = *(const uint32 *)value;
988 			return B_OK;
989 
990 		case SO_RCVBUF:
991 			if (length != sizeof(uint32))
992 				return B_BAD_VALUE;
993 
994 			socket->receive.buffer_size = *(const uint32 *)value;
995 			return B_OK;
996 
997 		case SO_SNDLOWAT:
998 			if (length != sizeof(uint32))
999 				return B_BAD_VALUE;
1000 
1001 			socket->send.low_water_mark = *(const uint32 *)value;
1002 			return B_OK;
1003 
1004 		case SO_RCVLOWAT:
1005 			if (length != sizeof(uint32))
1006 				return B_BAD_VALUE;
1007 
1008 			socket->receive.low_water_mark = *(const uint32 *)value;
1009 			return B_OK;
1010 
1011 		case SO_RCVTIMEO:
1012 		case SO_SNDTIMEO:
1013 		{
1014 			if (length != sizeof(struct timeval))
1015 				return B_BAD_VALUE;
1016 
1017 			const struct timeval *timeval = (const struct timeval *)value;
1018 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1019 			if (timeout == 0)
1020 				timeout = B_INFINITE_TIMEOUT;
1021 
1022 			if (option == SO_SNDTIMEO)
1023 				socket->send.timeout = timeout;
1024 			else
1025 				socket->receive.timeout = timeout;
1026 			return B_OK;
1027 		}
1028 
1029 		case SO_NONBLOCK:
1030 			if (length != sizeof(int32))
1031 				return B_BAD_VALUE;
1032 
1033 			if (*(const int32 *)value) {
1034 				socket->send.timeout = 0;
1035 				socket->receive.timeout = 0;
1036 			} else {
1037 				socket->send.timeout = B_INFINITE_TIMEOUT;
1038 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1039 			}
1040 			return B_OK;
1041 
1042 		case SO_BROADCAST:
1043 		case SO_DEBUG:
1044 		case SO_DONTROUTE:
1045 		case SO_KEEPALIVE:
1046 		case SO_OOBINLINE:
1047 		case SO_REUSEADDR:
1048 		case SO_REUSEPORT:
1049 		case SO_USELOOPBACK:
1050 			if (length != sizeof(int32))
1051 				return B_BAD_VALUE;
1052 
1053 			if (*(const int32 *)value)
1054 				socket->options |= option;
1055 			else
1056 				socket->options &= ~option;
1057 			return B_OK;
1058 
1059 		default:
1060 			break;
1061 	}
1062 
1063 	dprintf("socket_setsockopt: unknown option %d\n", option);
1064 	return ENOPROTOOPT;
1065 }
1066 
1067 
1068 int
1069 socket_setsockopt(net_socket *socket, int level, int option, const void *value,
1070 	int length)
1071 {
1072 	for (net_protocol *protocol = socket->first_protocol;
1073 			protocol; protocol = protocol->next) {
1074 		if (protocol->module->setsockopt)
1075 			return protocol->module->setsockopt(protocol, level, option,
1076 				value, length);
1077 	}
1078 
1079 	return socket_set_option(socket, level, option, value, length);
1080 }
1081 
1082 
1083 int
1084 socket_shutdown(net_socket *socket, int direction)
1085 {
1086 	return socket->first_info->shutdown(socket->first_protocol, direction);
1087 }
1088 
1089 
1090 //	#pragma mark -
1091 
1092 
1093 static status_t
1094 socket_std_ops(int32 op, ...)
1095 {
1096 	switch (op) {
1097 		case B_MODULE_INIT:
1098 		{
1099 			// TODO: this is currently done in the net_stack driver
1100 			// initialize the main stack if not done so already
1101 			//module_info *module;
1102 			//return get_module(NET_STARTER_MODULE_NAME, &module);
1103 			list_init_etc(&sSocketList, offsetof(net_socket_private, link));
1104 			return benaphore_init(&sSocketLock, "socket list");
1105 		}
1106 		case B_MODULE_UNINIT:
1107 			//return put_module(NET_STARTER_MODULE_NAME);
1108 			benaphore_destroy(&sSocketLock);
1109 			return B_OK;
1110 
1111 		default:
1112 			return B_ERROR;
1113 	}
1114 }
1115 
1116 
1117 net_socket_module_info gNetSocketModule = {
1118 	{
1119 		NET_SOCKET_MODULE_NAME,
1120 		0,
1121 		socket_std_ops
1122 	},
1123 	socket_open,
1124 	socket_close,
1125 	socket_free,
1126 
1127 	socket_readv,
1128 	socket_writev,
1129 	socket_control,
1130 
1131 	socket_read_avail,
1132 	socket_send_avail,
1133 
1134 	socket_send_data,
1135 	socket_receive_data,
1136 
1137 	socket_get_option,
1138 	socket_set_option,
1139 
1140 	socket_get_next_stat,
1141 
1142 	// connections
1143 	socket_spawn_pending,
1144 	socket_delete,
1145 	socket_dequeue_connected,
1146 	socket_count_connected,
1147 	socket_set_max_backlog,
1148 	socket_connected,
1149 
1150 	// notifications
1151 	socket_request_notification,
1152 	socket_cancel_notification,
1153 	socket_notify,
1154 
1155 	// standard socket API
1156 	socket_accept,
1157 	socket_bind,
1158 	socket_connect,
1159 	socket_getpeername,
1160 	socket_getsockname,
1161 	socket_getsockopt,
1162 	socket_listen,
1163 	socket_receive,
1164 	socket_send,
1165 	socket_setsockopt,
1166 	socket_shutdown,
1167 };
1168 
1169