xref: /haiku/src/add-ons/kernel/network/stack/net_socket.cpp (revision d9cebac2b77547b7064f22497514eecd2d047160)
1 /*
2  * Copyright 2006-2007, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "stack_private.h"
11 
12 #include <net_protocol.h>
13 #include <net_stack.h>
14 #include <net_stat.h>
15 
16 #include <KernelExport.h>
17 #include <Select.h>
18 #include <team.h>
19 #include <util/AutoLock.h>
20 #include <util/list.h>
21 #include <fs/select_sync_pool.h>
22 
23 #include <new>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/time.h>
27 
28 
29 struct net_socket_private : net_socket {
30 	struct list_link		link;
31 	team_id					owner;
32 	uint32					max_backlog;
33 	uint32					child_count;
34 	struct list				pending_children;
35 	struct list				connected_children;
36 
37 	struct select_sync_pool	*select_pool;
38 	benaphore				lock;
39 };
40 
41 
42 void socket_delete(net_socket *socket);
43 int socket_bind(net_socket *socket, const struct sockaddr *address,
44 	socklen_t addressLength);
45 
46 
47 struct list sSocketList;
48 benaphore sSocketLock;
49 
50 
51 static status_t
52 create_socket(int family, int type, int protocol, net_socket_private **_socket)
53 {
54 	struct net_socket_private *socket = new (std::nothrow) net_socket_private;
55 	if (socket == NULL)
56 		return B_NO_MEMORY;
57 
58 	memset(socket, 0, sizeof(net_socket_private));
59 	socket->family = family;
60 	socket->type = type;
61 	socket->protocol = protocol;
62 
63 	status_t status = benaphore_init(&socket->lock, "socket");
64 	if (status < B_OK)
65 		goto err1;
66 
67 	// set defaults (may be overridden by the protocols)
68 	socket->send.buffer_size = 65535;
69 	socket->send.low_water_mark = 1;
70 	socket->send.timeout = B_INFINITE_TIMEOUT;
71 	socket->receive.buffer_size = 65535;
72 	socket->receive.low_water_mark = 1;
73 	socket->receive.timeout = B_INFINITE_TIMEOUT;
74 
75 	list_init_etc(&socket->pending_children, offsetof(net_socket_private,
76 		link));
77 	list_init_etc(&socket->connected_children, offsetof(net_socket_private,
78 		link));
79 
80 	status = get_domain_protocols(socket);
81 	if (status < B_OK)
82 		goto err2;
83 
84 	*_socket = socket;
85 	return B_OK;
86 
87 err2:
88 	benaphore_destroy(&socket->lock);
89 err1:
90 	delete socket;
91 	return status;
92 }
93 
94 
95 //	#pragma mark -
96 
97 
98 status_t
99 socket_open(int family, int type, int protocol, net_socket **_socket)
100 {
101 	net_socket_private *socket;
102 	status_t status = create_socket(family, type, protocol, &socket);
103 	if (status < B_OK)
104 		return status;
105 
106 	status = socket->first_info->open(socket->first_protocol);
107 	if (status < B_OK) {
108 		socket_delete(socket);
109 		return status;
110 	}
111 
112 	socket->owner = team_get_current_team_id();
113 
114 	benaphore_lock(&sSocketLock);
115 	list_add_item(&sSocketList, socket);
116 	benaphore_unlock(&sSocketLock);
117 
118 	*_socket = socket;
119 	return B_OK;
120 }
121 
122 
123 status_t
124 socket_close(net_socket *_socket)
125 {
126 	net_socket_private *socket = (net_socket_private *)_socket;
127 
128 	if (socket->select_pool) {
129 		// notify all pending selects
130 		notify_select_event_pool(socket->select_pool, ~0);
131 	}
132 
133 	return socket->first_info->close(socket->first_protocol);
134 }
135 
136 
137 status_t
138 socket_free(net_socket *socket)
139 {
140 	status_t status = socket->first_info->free(socket->first_protocol);
141 	if (status == B_BUSY)
142 		return B_OK;
143 
144 	socket_delete(socket);
145 	return B_OK;
146 }
147 
148 
149 status_t
150 socket_readv(net_socket *socket, const iovec *vecs, size_t vecCount,
151 	size_t *_length)
152 {
153 	return -1;
154 }
155 
156 
157 status_t
158 socket_writev(net_socket *socket, const iovec *vecs, size_t vecCount,
159 	size_t *_length)
160 {
161 	if (socket->peer.ss_len == 0)
162 		return ECONNRESET;
163 
164 	if (socket->address.ss_len == 0) {
165 		// try to bind first
166 		status_t status = socket_bind(socket, NULL, 0);
167 		if (status < B_OK)
168 			return status;
169 	}
170 
171 	// TODO: useful, maybe even computed header space!
172 	net_buffer *buffer = gNetBufferModule.create(256);
173 	if (buffer == NULL)
174 		return ENOBUFS;
175 
176 	// copy data into buffer
177 
178 	for (uint32 i = 0; i < vecCount; i++) {
179 		if (gNetBufferModule.append(buffer, vecs[i].iov_base,
180 				vecs[i].iov_len) < B_OK) {
181 			gNetBufferModule.free(buffer);
182 			return ENOBUFS;
183 		}
184 	}
185 
186 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
187 	memcpy(buffer->destination, &socket->peer, socket->peer.ss_len);
188 	size_t size = buffer->size;
189 
190 	ssize_t bytesWritten = socket->first_info->send_data(socket->first_protocol,
191 		buffer);
192 	if (bytesWritten < B_OK) {
193 		if (buffer->size != size) {
194 			// this appears to be a partial write
195 			*_length = size - buffer->size;
196 		}
197 		gNetBufferModule.free(buffer);
198 		return bytesWritten;
199 	}
200 
201 	*_length = bytesWritten;
202 	return B_OK;
203 }
204 
205 
206 status_t
207 socket_control(net_socket *socket, int32 op, void *data, size_t length)
208 {
209 	return socket->first_info->control(socket->first_protocol,
210 		LEVEL_DRIVER_IOCTL, op, data, &length);
211 }
212 
213 
214 ssize_t
215 socket_read_avail(net_socket *socket)
216 {
217 	return socket->first_info->read_avail(socket->first_protocol);
218 }
219 
220 
221 ssize_t
222 socket_send_avail(net_socket *socket)
223 {
224 	return socket->first_info->send_avail(socket->first_protocol);
225 }
226 
227 
228 status_t
229 socket_send_data(net_socket *socket, net_buffer *buffer)
230 {
231 	return socket->first_info->send_data(socket->first_protocol,
232 		buffer);
233 }
234 
235 
236 status_t
237 socket_receive_data(net_socket *socket, size_t length, uint32 flags,
238 	net_buffer **_buffer)
239 {
240 	status_t status = socket->first_info->read_data(socket->first_protocol,
241 		length, flags, _buffer);
242 
243 	if (status < B_OK)
244 		return status;
245 
246 	if (*_buffer && length < (*_buffer)->size) {
247 		// discard any data behind the amount requested
248 		gNetBufferModule.trim(*_buffer, length);
249 	}
250 
251 	return status;
252 }
253 
254 
255 status_t
256 socket_get_next_stat(uint32 *_cookie, int family, struct net_stat *stat)
257 {
258 	BenaphoreLocker locker(sSocketLock);
259 
260 	net_socket_private *socket = NULL;
261 	uint32 cookie = *_cookie;
262 	uint32 count = 0;
263 	while ((socket = (net_socket_private *)list_get_next_item(&sSocketList,
264 			socket)) != NULL) {
265 		// TODO: also traverse the pending connections
266 		if (count == cookie)
267 			break;
268 
269 		if (family == -1 || family == socket->family)
270 			count++;
271 	}
272 
273 	if (socket == NULL)
274 		return B_ENTRY_NOT_FOUND;
275 
276 	*_cookie = count + 1;
277 
278 	stat->family = socket->family;
279 	stat->type = socket->type;
280 	stat->protocol = socket->protocol;
281 	stat->owner = socket->owner;
282 	stat->state[0] = '\0';
283 	memcpy(&stat->address, &socket->address, sizeof(struct sockaddr_storage));
284 	memcpy(&stat->peer, &socket->peer, sizeof(struct sockaddr_storage));
285 	stat->receive_queue_size = 0;
286 	stat->send_queue_size = 0;
287 
288 	// fill in protocol specific data (if supported by the protocol)
289 	size_t length = sizeof(net_stat);
290 	socket->first_info->control(socket->first_protocol, socket->protocol,
291 		NET_STAT_SOCKET, stat, &length);
292 
293 	return B_OK;
294 }
295 
296 
297 //	#pragma mark - connections
298 
299 
300 status_t
301 socket_spawn_pending(net_socket *_parent, net_socket **_socket)
302 {
303 	net_socket_private *parent = (net_socket_private *)_parent;
304 
305 	BenaphoreLocker locker(parent->lock);
306 
307 	// We actually accept more pending connections to compensate for those
308 	// that never complete, and also make sure at least a single connection
309 	// can always be accepted
310 	if (parent->child_count > 3 * parent->max_backlog / 2)
311 		return ENOBUFS;
312 
313 	net_socket_private *socket;
314 	status_t status = create_socket(parent->family, parent->type,
315 		parent->protocol, &socket);
316 	if (status < B_OK)
317 		return status;
318 
319 	// inherit parent's properties
320 	socket->send = parent->send;
321 	socket->receive = parent->receive;
322 	socket->options = parent->options & ~SO_ACCEPTCONN;
323 	socket->linger = parent->linger;
324 	socket->owner = parent->owner;
325 	memcpy(&socket->address, &parent->address, parent->address.ss_len);
326 	memcpy(&socket->peer, &parent->peer, parent->peer.ss_len);
327 
328 	// add to the parent's list of pending connections
329 	list_add_item(&parent->pending_children, socket);
330 	socket->parent = parent;
331 	parent->child_count++;
332 
333 	*_socket = socket;
334 	return B_OK;
335 }
336 
337 
338 void
339 socket_delete(net_socket *_socket)
340 {
341 	net_socket_private *socket = (net_socket_private *)_socket;
342 
343 	if (socket->parent != NULL)
344 		panic("socket still has a parent!");
345 
346 	benaphore_lock(&sSocketLock);
347 	list_remove_item(&sSocketList, socket);
348 	benaphore_unlock(&sSocketLock);
349 
350 	put_domain_protocols(socket);
351 	benaphore_destroy(&socket->lock);
352 	delete_select_sync_pool(socket->select_pool);
353 	delete socket;
354 }
355 
356 
357 status_t
358 socket_dequeue_connected(net_socket *_parent, net_socket **_socket)
359 {
360 	net_socket_private *parent = (net_socket_private *)_parent;
361 
362 	benaphore_lock(&parent->lock);
363 
364 	net_socket_private *socket = (net_socket_private *)list_remove_head_item(
365 		&parent->connected_children);
366 	if (socket != NULL) {
367 		socket->parent = NULL;
368 		parent->child_count--;
369 		*_socket = socket;
370 	}
371 
372 	benaphore_unlock(&parent->lock);
373 
374 	if (socket == NULL)
375 		return B_ENTRY_NOT_FOUND;
376 
377 	benaphore_lock(&sSocketLock);
378 	list_add_item(&sSocketList, socket);
379 	benaphore_unlock(&sSocketLock);
380 
381 	return B_OK;
382 }
383 
384 
385 ssize_t
386 socket_count_connected(net_socket *_parent)
387 {
388 	net_socket_private *parent = (net_socket_private *)_parent;
389 
390 	BenaphoreLocker _(parent->lock);
391 
392 	ssize_t count = 0;
393 	void *item = NULL;
394 	while ((item = list_get_next_item(&parent->connected_children,
395 			item)) != NULL) {
396 		count++;
397 	}
398 
399 	return count;
400 }
401 
402 
403 status_t
404 socket_set_max_backlog(net_socket *_socket, uint32 backlog)
405 {
406 	net_socket_private *socket = (net_socket_private *)_socket;
407 
408 	// we enforce an upper limit of connections waiting to be accepted
409 	if (backlog > 256)
410 		backlog = 256;
411 
412 	benaphore_lock(&socket->lock);
413 
414 	// first remove the pending connections, then the already connected
415 	// ones as needed
416 	net_socket_private *child;
417 	while (socket->child_count > backlog
418 		&& (child = (net_socket_private *)list_remove_tail_item(
419 				&socket->pending_children)) != NULL) {
420 		child->parent = NULL;
421 		socket->child_count--;
422 	}
423 	while (socket->child_count > backlog
424 		&& (child = (net_socket_private *)list_remove_tail_item(
425 				&socket->connected_children)) != NULL) {
426 		child->parent = NULL;
427 		socket_delete(child);
428 		socket->child_count--;
429 	}
430 
431 	socket->max_backlog = backlog;
432 	benaphore_unlock(&socket->lock);
433 	return B_OK;
434 }
435 
436 
437 /*!
438 	The socket has been connected. It will be moved to the connected queue
439 	of its parent socket.
440 */
441 status_t
442 socket_connected(net_socket *socket)
443 {
444 	net_socket_private *parent = (net_socket_private *)socket->parent;
445 	if (parent == NULL)
446 		return B_BAD_VALUE;
447 
448 	benaphore_lock(&parent->lock);
449 
450 	list_remove_item(&parent->pending_children, socket);
451 	list_add_item(&parent->connected_children, socket);
452 
453 	// notify parent
454 	if (parent->select_pool)
455 		notify_select_event_pool(parent->select_pool, B_SELECT_READ);
456 
457 	benaphore_unlock(&parent->lock);
458 	return B_OK;
459 }
460 
461 
462 //	#pragma mark - notifications
463 
464 
465 status_t
466 socket_request_notification(net_socket *_socket, uint8 event, uint32 ref,
467 	selectsync *sync)
468 {
469 	net_socket_private *socket = (net_socket_private *)_socket;
470 
471 	benaphore_lock(&socket->lock);
472 
473 	status_t status = add_select_sync_pool_entry(&socket->select_pool, sync,
474 		event);
475 
476 	benaphore_unlock(&socket->lock);
477 
478 	if (status < B_OK)
479 		return status;
480 
481 	// check if the event is already present
482 	// TODO: add support for poll() types
483 
484 	switch (event) {
485 		case B_SELECT_READ:
486 		{
487 			ssize_t available = socket_read_avail(socket);
488 			if ((ssize_t)socket->receive.low_water_mark <= available
489 				|| available < B_OK)
490 				notify_select_event(sync, event);
491 			break;
492 		}
493 		case B_SELECT_WRITE:
494 		{
495 			ssize_t available = socket_send_avail(socket);
496 			if ((ssize_t)socket->send.low_water_mark <= available
497 				|| available < B_OK)
498 				notify_select_event(sync, event);
499 			break;
500 		}
501 		case B_SELECT_ERROR:
502 			// TODO: B_SELECT_ERROR condition!
503 			break;
504 	}
505 
506 	return B_OK;
507 }
508 
509 
510 status_t
511 socket_cancel_notification(net_socket *_socket, uint8 event, selectsync *sync)
512 {
513 	net_socket_private *socket = (net_socket_private *)_socket;
514 
515 	benaphore_lock(&socket->lock);
516 
517 	status_t status = remove_select_sync_pool_entry(&socket->select_pool,
518 		sync, event);
519 
520 	benaphore_unlock(&socket->lock);
521 	return status;
522 }
523 
524 
525 status_t
526 socket_notify(net_socket *_socket, uint8 event, int32 value)
527 {
528 	net_socket_private *socket = (net_socket_private *)_socket;
529 	bool notify = true;
530 
531 	switch (event) {
532 		case B_SELECT_READ:
533 			if ((ssize_t)socket->receive.low_water_mark > value
534 				&& value >= B_OK)
535 				notify = false;
536 			break;
537 
538 		case B_SELECT_WRITE:
539 			if ((ssize_t)socket->send.low_water_mark > value
540 				&& value >= B_OK)
541 				notify = false;
542 			break;
543 
544 		case B_SELECT_ERROR:
545 			socket->error = value;
546 			break;
547 	}
548 
549 	benaphore_lock(&socket->lock);
550 
551 	if (notify && socket->select_pool)
552 		notify_select_event_pool(socket->select_pool, event);
553 
554 	benaphore_unlock(&socket->lock);
555 	return B_OK;
556 }
557 
558 
559 //	#pragma mark - standard socket API
560 
561 
562 int
563 socket_accept(net_socket *socket, struct sockaddr *address,
564 	socklen_t *_addressLength, net_socket **_acceptedSocket)
565 {
566 	if ((socket->options & SO_ACCEPTCONN) == 0)
567 		return B_BAD_VALUE;
568 
569 	net_socket *accepted;
570 	status_t status = socket->first_info->accept(socket->first_protocol,
571 		&accepted);
572 	if (status < B_OK)
573 		return status;
574 
575 	if (address && *_addressLength > 0) {
576 		memcpy(address, &accepted->peer, min_c(*_addressLength,
577 			min_c(accepted->peer.ss_len, sizeof(sockaddr_storage))));
578 		*_addressLength = accepted->peer.ss_len;
579 	}
580 
581 	*_acceptedSocket = accepted;
582 	return B_OK;
583 }
584 
585 
586 int
587 socket_bind(net_socket *socket, const struct sockaddr *address,
588 	socklen_t addressLength)
589 {
590 	sockaddr empty;
591 	if (address == NULL) {
592 		// special - try to bind to an empty address, like INADDR_ANY
593 		memset(&empty, 0, sizeof(sockaddr));
594 		empty.sa_len = sizeof(sockaddr);
595 		empty.sa_family = socket->family;
596 
597 		address = &empty;
598 		addressLength = sizeof(sockaddr);
599 	}
600 
601 	if (socket->address.ss_len != 0) {
602 		status_t status = socket->first_info->unbind(socket->first_protocol,
603 			(sockaddr *)&socket->address);
604 		if (status < B_OK)
605 			return status;
606 	}
607 
608 	memcpy(&socket->address, address, sizeof(sockaddr));
609 
610 	status_t status = socket->first_info->bind(socket->first_protocol,
611 		(sockaddr *)address);
612 	if (status < B_OK) {
613 		// clear address again, as binding failed
614 		socket->address.ss_len = 0;
615 	}
616 
617 	return status;
618 }
619 
620 
621 int
622 socket_connect(net_socket *socket, const struct sockaddr *address,
623 	socklen_t addressLength)
624 {
625 	if (address == NULL || addressLength == 0)
626 		return ENETUNREACH;
627 
628 	if (socket->address.ss_len == 0) {
629 		// try to bind first
630 		status_t status = socket_bind(socket, NULL, 0);
631 		if (status < B_OK)
632 			return status;
633 	}
634 
635 	return socket->first_info->connect(socket->first_protocol, address);
636 }
637 
638 
639 int
640 socket_getpeername(net_socket *socket, struct sockaddr *address,
641 	socklen_t *_addressLength)
642 {
643 	if (socket->peer.ss_len == 0)
644 		return ENOTCONN;
645 
646 	memcpy(address, &socket->peer, min_c(*_addressLength, socket->peer.ss_len));
647 	*_addressLength = socket->peer.ss_len;
648 	return B_OK;
649 }
650 
651 
652 int
653 socket_getsockname(net_socket *socket, struct sockaddr *address,
654 	socklen_t *_addressLength)
655 {
656 	if (socket->address.ss_len == 0)
657 		return ENOTCONN;
658 
659 	memcpy(address, &socket->address, min_c(*_addressLength,
660 		socket->address.ss_len));
661 	*_addressLength = socket->address.ss_len;
662 	return B_OK;
663 }
664 
665 
666 status_t
667 socket_get_option(net_socket *socket, int level, int option, void *value,
668 	int *_length)
669 {
670 	if (level != SOL_SOCKET)
671 		return ENOPROTOOPT;
672 
673 	switch (option) {
674 		case SO_SNDBUF:
675 		{
676 			uint32 *size = (uint32 *)value;
677 			*size = socket->send.buffer_size;
678 			*_length = sizeof(uint32);
679 			return B_OK;
680 		}
681 
682 		case SO_RCVBUF:
683 		{
684 			uint32 *size = (uint32 *)value;
685 			*size = socket->receive.buffer_size;
686 			*_length = sizeof(uint32);
687 			return B_OK;
688 		}
689 
690 		case SO_SNDLOWAT:
691 		{
692 			uint32 *size = (uint32 *)value;
693 			*size = socket->send.low_water_mark;
694 			*_length = sizeof(uint32);
695 			return B_OK;
696 		}
697 
698 		case SO_RCVLOWAT:
699 		{
700 			uint32 *size = (uint32 *)value;
701 			*size = socket->receive.low_water_mark;
702 			*_length = sizeof(uint32);
703 			return B_OK;
704 		}
705 
706 		case SO_RCVTIMEO:
707 		case SO_SNDTIMEO:
708 		{
709 			if (*_length < (int)sizeof(struct timeval))
710 				return B_BAD_VALUE;
711 
712 			bigtime_t timeout;
713 			if (option == SO_SNDTIMEO)
714 				timeout = socket->send.timeout;
715 			else
716 				timeout = socket->receive.timeout;
717 			if (timeout == B_INFINITE_TIMEOUT)
718 				timeout = 0;
719 
720 			struct timeval *timeval = (struct timeval *)value;
721 			timeval->tv_sec = timeout / 1000000LL;
722 			timeval->tv_usec = timeout % 1000000LL;
723 
724 			*_length = sizeof(struct timeval);
725 			return B_OK;
726 		}
727 
728 		case SO_NONBLOCK:
729 		{
730 			int32 *_set = (int32 *)value;
731 			*_set = socket->receive.timeout == 0 && socket->send.timeout == 0;
732 			*_length = sizeof(int32);
733 			return B_OK;
734 		}
735 
736 		case SO_ACCEPTCONN:
737 		case SO_BROADCAST:
738 		case SO_DEBUG:
739 		case SO_DONTROUTE:
740 		case SO_KEEPALIVE:
741 		case SO_OOBINLINE:
742 		case SO_REUSEADDR:
743 		case SO_REUSEPORT:
744 		case SO_USELOOPBACK:
745 		{
746 			int32 *_set = (int32 *)value;
747 			*_set = (socket->options & option) != 0;
748 			*_length = sizeof(int32);
749 			return B_OK;
750 		}
751 
752 		case SO_ERROR:
753 		{
754 			int32 *_set = (int32 *)value;
755 			*_set = socket->error;
756 			*_length = sizeof(int32);
757 
758 			socket->error = B_OK;
759 				// clear error upon retrieval
760 			return B_OK;
761 		}
762 
763 		default:
764 			break;
765 	}
766 
767 	dprintf("socket_getsockopt: unknown option %d\n", option);
768 	return ENOPROTOOPT;
769 }
770 
771 
772 int
773 socket_getsockopt(net_socket *socket, int level, int option, void *value,
774 	int *_length)
775 {
776 	return socket->first_protocol->module->getsockopt(socket->first_protocol,
777 		level, option, value, _length);
778 }
779 
780 
781 int
782 socket_listen(net_socket *socket, int backlog)
783 {
784 	status_t status = socket->first_info->listen(socket->first_protocol,
785 		backlog);
786 	if (status == B_OK)
787 		socket->options |= SO_ACCEPTCONN;
788 
789 	return status;
790 }
791 
792 
793 ssize_t
794 socket_receive(net_socket *socket, msghdr *header, void *data, size_t length,
795 	int flags)
796 {
797 	size_t totalLength = length;
798 	net_buffer *buffer;
799 	iovec tmp;
800 	int i;
801 
802 	// the convention to this function is that have header been
803 	// present, { data, length } would have been iovec[0] and is
804 	// always considered like that
805 
806 	if (header) {
807 		// calculate the length considering all of the extra buffers
808 		for (i = 1; i < header->msg_iovlen; i++) {
809 			if (user_memcpy(&tmp, header->msg_iov + i, sizeof(iovec)) < B_OK)
810 				return B_BAD_ADDRESS;
811 			if (tmp.iov_len > 0 && tmp.iov_base == NULL)
812 				return B_BAD_ADDRESS;
813 			totalLength += tmp.iov_len;
814 		}
815 	}
816 
817 	status_t status = socket->first_info->read_data(
818 		socket->first_protocol, totalLength, flags, &buffer);
819 	if (status < B_OK)
820 		return status;
821 
822 	// TODO: - returning a NULL buffer when received 0 bytes
823 	//         may not make much sense as we still need the address
824 	//       - gNetBufferModule.read() uses memcpy() instead of user_memcpy
825 
826 	size_t nameLen = 0;
827 
828 	if (header) {
829 		// TODO: - consider the control buffer options
830 		nameLen = header->msg_namelen;
831 		header->msg_namelen = 0;
832 		header->msg_flags = 0;
833 	}
834 
835 	if (buffer == NULL)
836 		return 0;
837 
838 	size_t bytesReceived = buffer->size, bytesCopied = 0;
839 
840 	length = min_c(bytesReceived, length);
841 	if (gNetBufferModule.read(buffer, 0, data, length) < B_OK) {
842 		gNetBufferModule.free(buffer);
843 		return ENOBUFS;
844 	}
845 
846 	// if first copy was a success, proceed to following
847 	// copies as required
848 	bytesCopied += length;
849 
850 	if (header) {
851 		// we only start considering at iovec[1]
852 		// as { data, length } is iovec[0]
853 		for (i = 1; i < header->msg_iovlen && bytesCopied < bytesReceived; i++) {
854 			if (user_memcpy(&tmp, header->msg_iov + i, sizeof(iovec)) < B_OK)
855 				break;
856 
857 			size_t toRead = min_c(bytesReceived - bytesCopied, tmp.iov_len);
858 			if (gNetBufferModule.read(buffer, bytesCopied, tmp.iov_base,
859 										toRead) < B_OK)
860 				break;
861 
862 			bytesCopied += toRead;
863 		}
864 
865 		if (header->msg_name != NULL) {
866 			header->msg_namelen = min_c(nameLen, buffer->source->sa_len);
867 			memcpy(header->msg_name, buffer->source, header->msg_namelen);
868 		}
869 	}
870 
871 	gNetBufferModule.free(buffer);
872 
873 	if (bytesCopied < bytesReceived) {
874 		if (header)
875 			header->msg_flags = MSG_TRUNC;
876 
877 		if (flags & MSG_TRUNC)
878 			return bytesReceived;
879 	}
880 
881 	return bytesCopied;
882 }
883 
884 
885 ssize_t
886 socket_send(net_socket *socket, msghdr *header, const void *data,
887 	size_t length, int flags)
888 {
889 	const sockaddr *address = NULL;
890 	socklen_t addressLength = 0;
891 
892 	// the convention to this function is that have header been
893 	// present, { data, length } would have been iovec[0] and is
894 	// always considered like that
895 
896 	if (header != NULL) {
897 		address = (const sockaddr *)header->msg_name;
898 		addressLength = header->msg_namelen;
899 
900 		if (header->msg_iovlen <= 1)
901 			header = NULL;
902 	}
903 
904 	if (addressLength == 0)
905 		address = NULL;
906 	else if (addressLength != 0 && address == NULL)
907 		return B_BAD_VALUE;
908 
909 	if (socket->peer.ss_len != 0) {
910 		if (address != NULL)
911 			return EISCONN;
912 
913 		// socket is connected, we use that address
914 		address = (struct sockaddr *)&socket->peer;
915 		addressLength = socket->peer.ss_len;
916 	}
917 
918 	if (address == NULL || addressLength == 0) {
919 		// don't know where to send to:
920 		return EDESTADDRREQ;
921 	}
922 
923 	if (socket->address.ss_len == 0) {
924 		// try to bind first
925 		status_t status = socket_bind(socket, NULL, 0);
926 		if (status < B_OK)
927 			return status;
928 	}
929 
930 	// TODO: useful, maybe even computed header space!
931 	net_buffer *buffer = gNetBufferModule.create(256);
932 	if (buffer == NULL)
933 		return ENOBUFS;
934 
935 	if (gNetBufferModule.append(buffer, data, length) < B_OK) {
936 		gNetBufferModule.free(buffer);
937 		return ENOBUFS;
938 	}
939 
940 	if (header) {
941 		// copy additional data into buffer
942 		for (int i = 1; i < header->msg_iovlen; i++) {
943 			iovec vec;
944 			if (user_memcpy(&vec, header->msg_iov + i, sizeof(iovec)) < B_OK)
945 				return B_BAD_ADDRESS;
946 			if (gNetBufferModule.append(buffer, vec.iov_base,
947 					vec.iov_len) < B_OK) {
948 				gNetBufferModule.free(buffer);
949 				return ENOBUFS;
950 			}
951 
952 			length += vec.iov_len;
953 		}
954 	}
955 
956 	buffer->flags = flags;
957 	memcpy(buffer->source, &socket->address, socket->address.ss_len);
958 	memcpy(buffer->destination, address, addressLength);
959 
960 	status_t status = socket->first_info->send_data(socket->first_protocol,
961 		buffer);
962 	if (status < B_OK) {
963 		size_t size = buffer->size;
964 		gNetBufferModule.free(buffer);
965 
966 		if (size != length
967 			&& (status == B_INTERRUPTED || status == B_WOULD_BLOCK)) {
968 			// this appears to be a partial write
969 			return length - size;
970 		}
971 		return status;
972 	}
973 
974 	return length;
975 }
976 
977 
978 status_t
979 socket_set_option(net_socket *socket, int level, int option, const void *value,
980 	int length)
981 {
982 	if (level != SOL_SOCKET)
983 		return ENOPROTOOPT;
984 
985 	switch (option) {
986 		// TODO: implement other options!
987 		case SO_LINGER:
988 		{
989 			if (length < (int)sizeof(struct linger))
990 				return B_BAD_VALUE;
991 
992 			struct linger *linger = (struct linger *)value;
993 			if (linger->l_onoff) {
994 				socket->options |= SO_LINGER;
995 				socket->linger = linger->l_linger;
996 			} else {
997 				socket->options &= ~SO_LINGER;
998 				socket->linger = 0;
999 			}
1000 			return B_OK;
1001 		}
1002 
1003 		case SO_SNDBUF:
1004 			if (length != sizeof(uint32))
1005 				return B_BAD_VALUE;
1006 
1007 			socket->send.buffer_size = *(const uint32 *)value;
1008 			return B_OK;
1009 
1010 		case SO_RCVBUF:
1011 			if (length != sizeof(uint32))
1012 				return B_BAD_VALUE;
1013 
1014 			socket->receive.buffer_size = *(const uint32 *)value;
1015 			return B_OK;
1016 
1017 		case SO_SNDLOWAT:
1018 			if (length != sizeof(uint32))
1019 				return B_BAD_VALUE;
1020 
1021 			socket->send.low_water_mark = *(const uint32 *)value;
1022 			return B_OK;
1023 
1024 		case SO_RCVLOWAT:
1025 			if (length != sizeof(uint32))
1026 				return B_BAD_VALUE;
1027 
1028 			socket->receive.low_water_mark = *(const uint32 *)value;
1029 			return B_OK;
1030 
1031 		case SO_RCVTIMEO:
1032 		case SO_SNDTIMEO:
1033 		{
1034 			if (length != sizeof(struct timeval))
1035 				return B_BAD_VALUE;
1036 
1037 			const struct timeval *timeval = (const struct timeval *)value;
1038 			bigtime_t timeout = timeval->tv_sec * 1000000LL + timeval->tv_usec;
1039 			if (timeout == 0)
1040 				timeout = B_INFINITE_TIMEOUT;
1041 
1042 			if (option == SO_SNDTIMEO)
1043 				socket->send.timeout = timeout;
1044 			else
1045 				socket->receive.timeout = timeout;
1046 			return B_OK;
1047 		}
1048 
1049 		case SO_NONBLOCK:
1050 			if (length != sizeof(int32))
1051 				return B_BAD_VALUE;
1052 
1053 			if (*(const int32 *)value) {
1054 				socket->send.timeout = 0;
1055 				socket->receive.timeout = 0;
1056 			} else {
1057 				socket->send.timeout = B_INFINITE_TIMEOUT;
1058 				socket->receive.timeout = B_INFINITE_TIMEOUT;
1059 			}
1060 			return B_OK;
1061 
1062 		case SO_BROADCAST:
1063 		case SO_DEBUG:
1064 		case SO_DONTROUTE:
1065 		case SO_KEEPALIVE:
1066 		case SO_OOBINLINE:
1067 		case SO_REUSEADDR:
1068 		case SO_REUSEPORT:
1069 		case SO_USELOOPBACK:
1070 			if (length != sizeof(int32))
1071 				return B_BAD_VALUE;
1072 
1073 			if (*(const int32 *)value)
1074 				socket->options |= option;
1075 			else
1076 				socket->options &= ~option;
1077 			return B_OK;
1078 
1079 		default:
1080 			break;
1081 	}
1082 
1083 	dprintf("socket_setsockopt: unknown option %d\n", option);
1084 	return ENOPROTOOPT;
1085 }
1086 
1087 
1088 int
1089 socket_setsockopt(net_socket *socket, int level, int option, const void *value,
1090 	int length)
1091 {
1092 	return socket->first_protocol->module->setsockopt(socket->first_protocol,
1093 		level, option, value, length);
1094 }
1095 
1096 
1097 int
1098 socket_shutdown(net_socket *socket, int direction)
1099 {
1100 	return socket->first_info->shutdown(socket->first_protocol, direction);
1101 }
1102 
1103 
1104 //	#pragma mark -
1105 
1106 
1107 static status_t
1108 socket_std_ops(int32 op, ...)
1109 {
1110 	switch (op) {
1111 		case B_MODULE_INIT:
1112 		{
1113 			// TODO: this is currently done in the net_stack driver
1114 			// initialize the main stack if not done so already
1115 			//module_info *module;
1116 			//return get_module(NET_STARTER_MODULE_NAME, &module);
1117 			list_init_etc(&sSocketList, offsetof(net_socket_private, link));
1118 			return benaphore_init(&sSocketLock, "socket list");
1119 		}
1120 		case B_MODULE_UNINIT:
1121 			//return put_module(NET_STARTER_MODULE_NAME);
1122 			benaphore_destroy(&sSocketLock);
1123 			return B_OK;
1124 
1125 		default:
1126 			return B_ERROR;
1127 	}
1128 }
1129 
1130 
1131 net_socket_module_info gNetSocketModule = {
1132 	{
1133 		NET_SOCKET_MODULE_NAME,
1134 		0,
1135 		socket_std_ops
1136 	},
1137 	socket_open,
1138 	socket_close,
1139 	socket_free,
1140 
1141 	socket_readv,
1142 	socket_writev,
1143 	socket_control,
1144 
1145 	socket_read_avail,
1146 	socket_send_avail,
1147 
1148 	socket_send_data,
1149 	socket_receive_data,
1150 
1151 	socket_get_option,
1152 	socket_set_option,
1153 
1154 	socket_get_next_stat,
1155 
1156 	// connections
1157 	socket_spawn_pending,
1158 	socket_delete,
1159 	socket_dequeue_connected,
1160 	socket_count_connected,
1161 	socket_set_max_backlog,
1162 	socket_connected,
1163 
1164 	// notifications
1165 	socket_request_notification,
1166 	socket_cancel_notification,
1167 	socket_notify,
1168 
1169 	// standard socket API
1170 	socket_accept,
1171 	socket_bind,
1172 	socket_connect,
1173 	socket_getpeername,
1174 	socket_getsockname,
1175 	socket_getsockopt,
1176 	socket_listen,
1177 	socket_receive,
1178 	socket_send,
1179 	socket_setsockopt,
1180 	socket_shutdown,
1181 };
1182 
1183