xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision c7509fce9db782326f159843f1b028b5f5dcb1d2)
1 /*
2  * Copyright 2006-2007, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4_address.h"
11 
12 #include <net_datalink.h>
13 #include <net_protocol.h>
14 #include <net_stack.h>
15 #include <NetBufferUtilities.h>
16 
17 #include <ByteOrder.h>
18 #include <KernelExport.h>
19 #include <util/AutoLock.h>
20 #include <util/list.h>
21 #include <util/khash.h>
22 #include <util/DoublyLinkedList.h>
23 
24 #include <netinet/in.h>
25 #include <netinet/ip.h>
26 #include <new>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 
31 //#define TRACE_IPV4
32 #ifdef TRACE_IPV4
33 #	define TRACE(x) dprintf x
34 #else
35 #	define TRACE(x) ;
36 #endif
37 
38 struct ipv4_header {
39 #if B_HOST_IS_LENDIAN == 1
40 	uint8		header_length : 4;	// header length in 32-bit words
41 	uint8		version : 4;
42 #else
43 	uint8		version : 4;
44 	uint8		header_length : 4;
45 #endif
46 	uint8		service_type;
47 	uint16		total_length;
48 	uint16		id;
49 	uint16		fragment_offset;
50 	uint8		time_to_live;
51 	uint8		protocol;
52 	uint16		checksum;
53 	in_addr_t	source;
54 	in_addr_t	destination;
55 
56 	uint16 HeaderLength() const { return header_length << 2; }
57 	uint16 TotalLength() const { return ntohs(total_length); }
58 	uint16 FragmentOffset() const { return ntohs(fragment_offset); }
59 } _PACKED;
60 
61 #define IP_VERSION				4
62 
63 // fragment flags
64 #define IP_RESERVED_FLAG		0x8000
65 #define IP_DONT_FRAGMENT		0x4000
66 #define IP_MORE_FRAGMENTS		0x2000
67 #define IP_FRAGMENT_OFFSET_MASK	0x1fff
68 
69 #define MAX_HASH_FRAGMENTS 		64
70 	// slots in the fragment packet's hash
71 #define FRAGMENT_TIMEOUT		60000000LL
72 	// discard fragment after 60 seconds
73 
74 typedef DoublyLinkedList<struct net_buffer,
75 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
76 
77 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
78 
79 struct ipv4_packet_key {
80 	in_addr_t	source;
81 	in_addr_t	destination;
82 	uint16		id;
83 	uint8		protocol;
84 };
85 
86 class FragmentPacket {
87 	public:
88 		FragmentPacket(const ipv4_packet_key &key);
89 		~FragmentPacket();
90 
91 		status_t AddFragment(uint16 start, uint16 end, net_buffer *buffer,
92 					bool lastFragment);
93 		status_t Reassemble(net_buffer *to);
94 
95 		bool IsComplete() const { return fReceivedLastFragment && fBytesLeft == 0; }
96 
97 		static uint32 Hash(void *_packet, const void *_key, uint32 range);
98 		static int Compare(void *_packet, const void *_key);
99 		static int32 NextOffset() { return offsetof(FragmentPacket, fNext); }
100 		static void StaleTimer(struct net_timer *timer, void *data);
101 
102 	private:
103 		FragmentPacket	*fNext;
104 		struct ipv4_packet_key fKey;
105 		bool			fReceivedLastFragment;
106 		int32			fBytesLeft;
107 		FragmentList	fFragments;
108 		net_timer		fTimer;
109 };
110 
111 typedef DoublyLinkedList<class RawSocket> RawSocketList;
112 
113 class RawSocket : public DoublyLinkedListLinkImpl<RawSocket> {
114 	public:
115 		RawSocket(net_socket *socket);
116 		~RawSocket();
117 
118 		status_t InitCheck();
119 
120 		status_t Read(size_t numBytes, uint32 flags, bigtime_t timeout,
121 					net_buffer **_buffer);
122 		ssize_t BytesAvailable();
123 
124 		status_t Write(net_buffer *buffer);
125 
126 	private:
127 		net_socket	*fSocket;
128 		net_fifo	fFifo;
129 };
130 
131 struct ipv4_protocol : net_protocol {
132 	RawSocket	*raw;
133 	uint8		service_type;
134 	uint8		time_to_live;
135 	uint32		flags;
136 };
137 
138 // protocol flags
139 #define IP_FLAG_HEADER_INCLUDED	0x01
140 
141 
142 extern net_protocol_module_info gIPv4Module;
143 	// we need this in ipv4_std_ops() for registering the AF_INET domain
144 
145 static struct net_domain *sDomain;
146 static net_datalink_module_info *sDatalinkModule;
147 static net_stack_module_info *sStackModule;
148 struct net_buffer_module_info *gBufferModule;
149 static int32 sPacketID;
150 static RawSocketList sRawSockets;
151 static benaphore sRawSocketsLock;
152 static benaphore sFragmentLock;
153 static hash_table *sFragmentHash;
154 static net_protocol_module_info *sReceivingProtocol[256];
155 static benaphore sReceivingProtocolLock;
156 
157 
158 RawSocket::RawSocket(net_socket *socket)
159 	:
160 	fSocket(socket)
161 {
162 	status_t status = sStackModule->init_fifo(&fFifo, "ipv4 raw socket", 65536);
163 	if (status < B_OK)
164 		fFifo.notify = status;
165 }
166 
167 
168 RawSocket::~RawSocket()
169 {
170 	if (fFifo.notify >= B_OK)
171 		sStackModule->uninit_fifo(&fFifo);
172 }
173 
174 
175 status_t
176 RawSocket::InitCheck()
177 {
178 	return fFifo.notify >= B_OK ? B_OK : fFifo.notify;
179 }
180 
181 
182 status_t
183 RawSocket::Read(size_t numBytes, uint32 flags, bigtime_t timeout,
184 	net_buffer **_buffer)
185 {
186 	net_buffer *buffer;
187 	status_t status = sStackModule->fifo_dequeue_buffer(&fFifo,
188 		flags, timeout, &buffer);
189 	if (status < B_OK)
190 		return status;
191 
192 	*_buffer = buffer;
193 	return B_OK;
194 }
195 
196 
197 ssize_t
198 RawSocket::BytesAvailable()
199 {
200 	return fFifo.current_bytes;
201 }
202 
203 
204 status_t
205 RawSocket::Write(net_buffer *source)
206 {
207 	return sStackModule->fifo_socket_enqueue_buffer(&fFifo, fSocket,
208 			B_SELECT_READ, source);
209 }
210 
211 
212 //	#pragma mark -
213 
214 
215 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
216 	:
217 	fKey(key),
218 	fReceivedLastFragment(false),
219 	fBytesLeft(IP_MAXPACKET)
220 {
221 	sStackModule->init_timer(&fTimer, StaleTimer, this);
222 }
223 
224 
225 FragmentPacket::~FragmentPacket()
226 {
227 	// cancel the kill timer
228 	sStackModule->set_timer(&fTimer, -1);
229 
230 	// delete all fragments
231 	net_buffer *buffer;
232 	while ((buffer = fFragments.RemoveHead()) != NULL) {
233 		gBufferModule->free(buffer);
234 	}
235 }
236 
237 
238 status_t
239 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer *buffer,
240 	bool lastFragment)
241 {
242 	// restart the timer
243 	sStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
244 
245 	if (start >= end) {
246 		// invalid fragment
247 		return B_BAD_DATA;
248 	}
249 
250 	// Search for a position in the list to insert the fragment
251 
252 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
253 	net_buffer *previous = NULL;
254 	net_buffer *next = NULL;
255 	while ((previous = iterator.Next()) != NULL) {
256 		if (previous->fragment.start <= start) {
257 			// The new fragment can be inserted after this one
258 			break;
259 		}
260 
261 		next = previous;
262 	}
263 
264 	// See if we already have the fragment's data
265 
266 	if (previous != NULL && previous->fragment.start <= start
267 		&& previous->fragment.end >= end) {
268 		// we do, so we can just drop this fragment
269 		gBufferModule->free(buffer);
270 		return B_OK;
271 	}
272 
273 	TRACE(("    previous: %p, next: %p\n", previous, next));
274 
275 	// If we have parts of the data already, truncate as needed
276 
277 	if (previous != NULL && previous->fragment.end > start) {
278 		TRACE(("    remove header %d bytes\n", previous->fragment.end - start));
279 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
280 		start = previous->fragment.end;
281 	}
282 	if (next != NULL && next->fragment.start < end) {
283 		TRACE(("    remove trailer %d bytes\n", next->fragment.start - end));
284 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
285 		end = next->fragment.start;
286 	}
287 
288 	// Now try if we can already merge the fragments together
289 
290 	// We will always keep the last buffer received, so that we can still
291 	// report an error (in which case we're not responsible for freeing it)
292 
293 	if (previous != NULL && previous->fragment.end == start) {
294 		fFragments.Remove(previous);
295 
296 		buffer->fragment.start = previous->fragment.start;
297 		buffer->fragment.end = end;
298 
299 		status_t status = gBufferModule->merge(buffer, previous, false);
300 		TRACE(("    merge previous: %s\n", strerror(status)));
301 		if (status < B_OK) {
302 			fFragments.Insert(next, previous);
303 			return status;
304 		}
305 
306 		fFragments.Insert(next, buffer);
307 
308 		// cut down existing hole
309 		fBytesLeft -= end - start;
310 
311 		if (lastFragment && !fReceivedLastFragment) {
312 			fReceivedLastFragment = true;
313 			fBytesLeft -= IP_MAXPACKET - end;
314 		}
315 
316 		TRACE(("    hole length: %d\n", (int)fBytesLeft));
317 
318 		return B_OK;
319 	} else if (next != NULL && next->fragment.start == end) {
320 		fFragments.Remove(next);
321 
322 		buffer->fragment.start = start;
323 		buffer->fragment.end = next->fragment.end;
324 
325 		status_t status = gBufferModule->merge(buffer, next, true);
326 		TRACE(("    merge next: %s\n", strerror(status)));
327 		if (status < B_OK) {
328 			fFragments.Insert((net_buffer *)previous->link.next, next);
329 			return status;
330 		}
331 
332 		fFragments.Insert((net_buffer *)previous->link.next, buffer);
333 
334 		// cut down existing hole
335 		fBytesLeft -= end - start;
336 
337 		if (lastFragment && !fReceivedLastFragment) {
338 			fReceivedLastFragment = true;
339 			fBytesLeft -= IP_MAXPACKET - end;
340 		}
341 
342 		TRACE(("    hole length: %d\n", (int)fBytesLeft));
343 
344 		return B_OK;
345 	}
346 
347 	// We couldn't merge the fragments, so we need to add it as is
348 
349 	TRACE(("    new fragment: %p, bytes %d-%d\n", buffer, start, end));
350 
351 	buffer->fragment.start = start;
352 	buffer->fragment.end = end;
353 	fFragments.Insert(next, buffer);
354 
355 	// update length of the hole, if any
356 	fBytesLeft -= end - start;
357 
358 	if (lastFragment && !fReceivedLastFragment) {
359 		fReceivedLastFragment = true;
360 		fBytesLeft -= IP_MAXPACKET - end;
361 	}
362 
363 	TRACE(("    hole length: %d\n", (int)fBytesLeft));
364 
365 	return B_OK;
366 }
367 
368 
369 /*!
370 	Reassembles the fragments to the specified buffer \a to.
371 	This buffer must have been added via AddFragment() before.
372 */
373 status_t
374 FragmentPacket::Reassemble(net_buffer *to)
375 {
376 	if (!IsComplete())
377 		return NULL;
378 
379 	net_buffer *buffer = NULL;
380 
381 	net_buffer *fragment;
382 	while ((fragment = fFragments.RemoveHead()) != NULL) {
383 		if (buffer != NULL) {
384 			status_t status;
385 			if (to == fragment) {
386 				status = gBufferModule->merge(fragment, buffer, false);
387 				buffer = fragment;
388 			} else
389 				status = gBufferModule->merge(buffer, fragment, true);
390 			if (status < B_OK)
391 				return status;
392 		} else
393 			buffer = fragment;
394 	}
395 
396 	if (buffer != to)
397 		panic("ipv4 packet reassembly did not work correctly.\n");
398 
399 	return B_OK;
400 }
401 
402 
403 int
404 FragmentPacket::Compare(void *_packet, const void *_key)
405 {
406 	const ipv4_packet_key *key = (ipv4_packet_key *)_key;
407 	ipv4_packet_key *packetKey = &((FragmentPacket *)_packet)->fKey;
408 
409 	if (packetKey->id == key->id
410 		&& packetKey->source == key->source
411 		&& packetKey->destination == key->destination
412 		&& packetKey->protocol == key->protocol)
413 		return 0;
414 
415 	return 1;
416 }
417 
418 
419 uint32
420 FragmentPacket::Hash(void *_packet, const void *_key, uint32 range)
421 {
422 	const struct ipv4_packet_key *key = (struct ipv4_packet_key *)_key;
423 	FragmentPacket *packet = (FragmentPacket *)_packet;
424 	if (packet != NULL)
425 		key = &packet->fKey;
426 
427 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
428 }
429 
430 
431 /*static*/ void
432 FragmentPacket::StaleTimer(struct net_timer *timer, void *data)
433 {
434 	FragmentPacket *packet = (FragmentPacket *)data;
435 	TRACE(("Assembling FragmentPacket %p timed out!\n", packet));
436 
437 	BenaphoreLocker locker(&sFragmentLock);
438 
439 	hash_remove(sFragmentHash, packet);
440 	delete packet;
441 }
442 
443 
444 //	#pragma mark -
445 
446 
447 #if 0
448 static void
449 dump_ipv4_header(ipv4_header &header)
450 {
451 	struct pretty_ipv4 {
452 	#if B_HOST_IS_LENDIAN == 1
453 		uint8 a;
454 		uint8 b;
455 		uint8 c;
456 		uint8 d;
457 	#else
458 		uint8 d;
459 		uint8 c;
460 		uint8 b;
461 		uint8 a;
462 	#endif
463 	};
464 	struct pretty_ipv4 *src = (struct pretty_ipv4 *)&header.source;
465 	struct pretty_ipv4 *dst = (struct pretty_ipv4 *)&header.destination;
466 	dprintf("  version: %d\n", header.version);
467 	dprintf("  header_length: 4 * %d\n", header.header_length);
468 	dprintf("  service_type: %d\n", header.service_type);
469 	dprintf("  total_length: %d\n", header.TotalLength());
470 	dprintf("  id: %d\n", ntohs(header.id));
471 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
472 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
473 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
474 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
475 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
476 	dprintf("  time_to_live: %d\n", header.time_to_live);
477 	dprintf("  protocol: %d\n", header.protocol);
478 	dprintf("  checksum: %d\n", ntohs(header.checksum));
479 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
480 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
481 }
482 #endif
483 
484 
485 /*!
486 	Attempts to re-assemble fragmented packets.
487 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
488 		will point to its buffer, otherwise, it will be \c NULL.
489 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
490 */
491 static status_t
492 reassemble_fragments(const ipv4_header &header, net_buffer **_buffer)
493 {
494 	net_buffer *buffer = *_buffer;
495 	status_t status;
496 
497 	struct ipv4_packet_key key;
498 	key.source = (in_addr_t)header.source;
499 	key.destination = (in_addr_t)header.destination;
500 	key.id = header.id;
501 	key.protocol = header.protocol;
502 
503 	// TODO: Make locking finer grained.
504 	BenaphoreLocker locker(&sFragmentLock);
505 
506 	FragmentPacket *packet = (FragmentPacket *)hash_lookup(sFragmentHash, &key);
507 	if (packet == NULL) {
508 		// New fragment packet
509 		packet = new (std::nothrow) FragmentPacket(key);
510 		if (packet == NULL)
511 			return B_NO_MEMORY;
512 
513 		// add packet to hash
514 		status = hash_insert(sFragmentHash, packet);
515 		if (status != B_OK) {
516 			delete packet;
517 			return status;
518 		}
519 	}
520 
521 	uint16 fragmentOffset = header.FragmentOffset();
522 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
523 	uint16 end = start + header.TotalLength() - header.HeaderLength();
524 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
525 
526 	TRACE(("   Received IPv4 %sfragment of size %d, offset %d.\n",
527 		lastFragment ? "last ": "", end - start, start));
528 
529 	// Remove header unless this is the first fragment
530 	if (start != 0)
531 		gBufferModule->remove_header(buffer, header.HeaderLength());
532 
533 	status = packet->AddFragment(start, end, buffer, lastFragment);
534 	if (status != B_OK)
535 		return status;
536 
537 	if (packet->IsComplete()) {
538 		hash_remove(sFragmentHash, packet);
539 			// no matter if reassembling succeeds, we won't need this packet anymore
540 
541 		status = packet->Reassemble(buffer);
542 		delete packet;
543 
544 		// _buffer does not change
545 		return status;
546 	}
547 
548 	// This indicates that the packet is not yet complete
549 	*_buffer = NULL;
550 	return B_OK;
551 }
552 
553 
554 /*!
555 	Fragments the incoming buffer and send all fragments via the specified
556 	\a route.
557 */
558 static status_t
559 send_fragments(ipv4_protocol *protocol, struct net_route *route,
560 	net_buffer *buffer, uint32 mtu)
561 {
562 	TRACE(("ipv4 needs to fragment (size %lu, MTU %lu)...\n",
563 		buffer->size, mtu));
564 
565 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
566 	if (originalHeader.Status() < B_OK)
567 		return originalHeader.Status();
568 
569 	uint16 headerLength = originalHeader->HeaderLength();
570 	uint32 bytesLeft = buffer->size - headerLength;
571 	uint32 fragmentOffset = 0;
572 	status_t status = B_OK;
573 
574 	net_buffer *headerBuffer = gBufferModule->split(buffer, headerLength);
575 	if (headerBuffer == NULL)
576 		return B_NO_MEMORY;
577 
578 	// TODO we need to make sure ipv4_header is contiguous or
579 	//      use another construct.
580 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
581 	ipv4_header *header = &bufferHeader.Data();
582 
583 	// adapt MTU to be a multiple of 8 (fragment offsets can only be specified this way)
584 	mtu -= headerLength;
585 	mtu &= ~7;
586 	dprintf("  adjusted MTU to %ld\n", mtu);
587 
588 	dprintf("  bytesLeft = %ld\n", bytesLeft);
589 	while (bytesLeft > 0) {
590 		uint32 fragmentLength = min_c(bytesLeft, mtu);
591 		bytesLeft -= fragmentLength;
592 		bool lastFragment = bytesLeft == 0;
593 
594 		header->total_length = htons(fragmentLength + headerLength);
595 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
596 			| (fragmentOffset >> 3));
597 		header->checksum = 0;
598 		header->checksum = sStackModule->checksum((uint8 *)header, headerLength);
599 			// TODO: compute the checksum only for those parts that changed?
600 
601 		dprintf("  send fragment of %ld bytes (%ld bytes left)\n", fragmentLength, bytesLeft);
602 
603 		net_buffer *fragmentBuffer;
604 		if (!lastFragment) {
605 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
606 			fragmentOffset += fragmentLength;
607 		} else
608 			fragmentBuffer = buffer;
609 
610 		if (fragmentBuffer == NULL) {
611 			status = B_NO_MEMORY;
612 			break;
613 		}
614 
615 		// copy header to fragment
616 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
617 
618 		// send fragment
619 		if (status == B_OK)
620 			status = sDatalinkModule->send_data(route, fragmentBuffer);
621 
622 		if (lastFragment) {
623 			// we don't own the last buffer, so we don't have to free it
624 			break;
625 		}
626 
627 		if (status < B_OK) {
628 			gBufferModule->free(fragmentBuffer);
629 			break;
630 		}
631 	}
632 
633 	gBufferModule->free(headerBuffer);
634 	return status;
635 }
636 
637 
638 static void
639 raw_receive_data(net_buffer *buffer)
640 {
641 	BenaphoreLocker locker(sRawSocketsLock);
642 
643 	TRACE(("ipv4:raw_receive_data(): protocol %i\n", buffer->protocol));
644 
645 	RawSocketList::Iterator iterator = sRawSockets.GetIterator();
646 
647 	while (iterator.HasNext()) {
648 		RawSocket *raw = iterator.Next();
649 		raw->Write(buffer);
650 	}
651 }
652 
653 
654 static net_protocol_module_info *
655 receiving_protocol(uint8 protocol)
656 {
657 	net_protocol_module_info *module = sReceivingProtocol[protocol];
658 	if (module != NULL)
659 		return module;
660 
661 	BenaphoreLocker locker(sReceivingProtocolLock);
662 
663 	module = sReceivingProtocol[protocol];
664 	if (module != NULL)
665 		return module;
666 
667 	if (sStackModule->get_domain_receiving_protocol(sDomain, protocol, &module) == B_OK)
668 		sReceivingProtocol[protocol] = module;
669 
670 	return module;
671 }
672 
673 
674 //	#pragma mark -
675 
676 
677 net_protocol *
678 ipv4_init_protocol(net_socket *socket)
679 {
680 	ipv4_protocol *protocol = new (std::nothrow) ipv4_protocol;
681 	if (protocol == NULL)
682 		return NULL;
683 
684 	protocol->raw = NULL;
685 	protocol->service_type = 0;
686 	protocol->time_to_live = 254;
687 	protocol->flags = 0;
688 	return protocol;
689 }
690 
691 
692 status_t
693 ipv4_uninit_protocol(net_protocol *_protocol)
694 {
695 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
696 
697 	delete protocol->raw;
698 	delete protocol;
699 	return B_OK;
700 }
701 
702 
703 /*!
704 	Since open() is only called on the top level protocol, when we get here
705 	it means we are on a SOCK_RAW socket.
706 */
707 status_t
708 ipv4_open(net_protocol *_protocol)
709 {
710 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
711 
712 	RawSocket *raw = new (std::nothrow) RawSocket(protocol->socket);
713 	if (raw == NULL)
714 		return B_NO_MEMORY;
715 
716 	status_t status = raw->InitCheck();
717 	if (status < B_OK) {
718 		delete raw;
719 		return status;
720 	}
721 
722 	protocol->raw = raw;
723 
724 	BenaphoreLocker locker(sRawSocketsLock);
725 	sRawSockets.Add(raw);
726 	return B_OK;
727 }
728 
729 
730 status_t
731 ipv4_close(net_protocol *_protocol)
732 {
733 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
734 	RawSocket *raw = protocol->raw;
735 	if (raw == NULL)
736 		return B_ERROR;
737 
738 	BenaphoreLocker locker(sRawSocketsLock);
739 	sRawSockets.Remove(raw);
740 	delete raw;
741 	protocol->raw = NULL;
742 
743 	return B_OK;
744 }
745 
746 
747 status_t
748 ipv4_free(net_protocol *protocol)
749 {
750 	return B_OK;
751 }
752 
753 
754 status_t
755 ipv4_connect(net_protocol *protocol, const struct sockaddr *address)
756 {
757 	return B_ERROR;
758 }
759 
760 
761 status_t
762 ipv4_accept(net_protocol *protocol, struct net_socket **_acceptedSocket)
763 {
764 	return EOPNOTSUPP;
765 }
766 
767 
768 status_t
769 ipv4_control(net_protocol *_protocol, int level, int option, void *value,
770 	size_t *_length)
771 {
772 	if ((level & LEVEL_MASK) != IPPROTO_IP)
773 		return sDatalinkModule->control(sDomain, option, value, _length);
774 
775 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
776 
777 	if (level & LEVEL_GET_OPTION) {
778 		// get options
779 
780 		switch (option) {
781 			case IP_HDRINCL:
782 			{
783 				if (*_length != sizeof(int))
784 					return B_BAD_VALUE;
785 
786 				int headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
787 				return user_memcpy(value, &headerIncluded, sizeof(headerIncluded));
788 			}
789 
790 			case IP_TTL:
791 			{
792 				if (*_length != sizeof(int))
793 					return B_BAD_VALUE;
794 
795 				int timeToLive = protocol->time_to_live;
796 				return user_memcpy(value, &timeToLive, sizeof(timeToLive));
797 			}
798 
799 			case IP_TOS:
800 			{
801 				if (*_length != sizeof(int))
802 					return B_BAD_VALUE;
803 
804 				int serviceType = protocol->service_type;
805 				return user_memcpy(value, &serviceType, sizeof(serviceType));
806 			}
807 
808 			default:
809 				dprintf("IPv4::control(): get unknown option: %d\n", option);
810 				return ENOPROTOOPT;
811 		}
812 	}
813 
814 	// set options
815 
816 	switch (option) {
817 		case IP_HDRINCL:
818 		{
819 			int headerIncluded;
820 			if (*_length != sizeof(int))
821 				return B_BAD_VALUE;
822 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded)) < B_OK)
823 				return B_BAD_ADDRESS;
824 
825 			if (headerIncluded)
826 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
827 			else
828 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
829 			return B_OK;
830 		}
831 
832 		case IP_TTL:
833 		{
834 			int timeToLive;
835 			if (*_length != sizeof(int))
836 				return B_BAD_VALUE;
837 			if (user_memcpy(&timeToLive, value, sizeof(timeToLive)) < B_OK)
838 				return B_BAD_ADDRESS;
839 
840 			protocol->time_to_live = timeToLive;
841 			return B_OK;
842 		}
843 
844 		case IP_TOS:
845 		{
846 			int serviceType;
847 			if (*_length != sizeof(int))
848 				return B_BAD_VALUE;
849 			if (user_memcpy(&serviceType, value, sizeof(serviceType)) < B_OK)
850 				return B_BAD_ADDRESS;
851 
852 			protocol->service_type = serviceType;
853 			return B_OK;
854 		}
855 
856 		default:
857 			dprintf("IPv4::control(): set unknown option: %d\n", option);
858 			return ENOPROTOOPT;
859 	}
860 
861 	// never gets here
862 	return B_BAD_VALUE;
863 }
864 
865 
866 status_t
867 ipv4_bind(net_protocol *protocol, struct sockaddr *address)
868 {
869 	if (address->sa_family != AF_INET)
870 		return EAFNOSUPPORT;
871 
872 	// only INADDR_ANY and addresses of local interfaces are accepted:
873 	if (((sockaddr_in *)address)->sin_addr.s_addr == INADDR_ANY
874 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
875 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
876 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
877 			// explicitly set length, as our callers can't be trusted to
878 			// always provide the correct length!
879 		return B_OK;
880 	}
881 
882 	return B_ERROR;
883 		// address is unknown on this host
884 }
885 
886 
887 status_t
888 ipv4_unbind(net_protocol *protocol, struct sockaddr *address)
889 {
890 	// nothing to do here
891 	return B_OK;
892 }
893 
894 
895 status_t
896 ipv4_listen(net_protocol *protocol, int count)
897 {
898 	return EOPNOTSUPP;
899 }
900 
901 
902 status_t
903 ipv4_shutdown(net_protocol *protocol, int direction)
904 {
905 	return EOPNOTSUPP;
906 }
907 
908 
909 status_t
910 ipv4_send_routed_data(net_protocol *_protocol, struct net_route *route,
911 	net_buffer *buffer)
912 {
913 	if (route == NULL)
914 		return B_BAD_VALUE;
915 
916 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
917 	net_interface *interface = route->interface;
918 
919 	TRACE(("someone tries to send some actual routed data!\n"));
920 
921 	sockaddr_in &source = *(sockaddr_in *)&buffer->source;
922 	sockaddr_in &destination = *(sockaddr_in *)&buffer->destination;
923 
924 	bool headerIncluded = false, checksumNeeded = true;
925 	if (protocol != NULL)
926 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
927 
928 	// Add IP header (if needed)
929 
930 	if (!headerIncluded) {
931 		NetBufferPrepend<ipv4_header> header(buffer);
932 		if (header.Status() < B_OK)
933 			return header.Status();
934 
935 		header->version = IP_VERSION;
936 		header->header_length = sizeof(ipv4_header) / 4;
937 		header->service_type = protocol ? protocol->service_type : 0;
938 		header->total_length = htons(buffer->size);
939 		header->id = htons(atomic_add(&sPacketID, 1));
940 		header->fragment_offset = 0;
941 		header->time_to_live = protocol ? protocol->time_to_live : 254;
942 		header->protocol = protocol ? protocol->socket->protocol : buffer->protocol;
943 		header->checksum = 0;
944 
945 		header->source = source.sin_addr.s_addr;
946 		header->destination = destination.sin_addr.s_addr;
947 	} else {
948 		// if IP_HDRINCL, check if the source address is set
949 		NetBufferHeaderReader<ipv4_header> header(buffer);
950 		if (header.Status() < B_OK)
951 			return header.Status();
952 
953 		if (header->source == 0) {
954 			header->source = source.sin_addr.s_addr;
955 			header->checksum = 0;
956 			header.Sync();
957 		} else
958 			checksumNeeded = false;
959 	}
960 
961 	if (buffer->size > 0xffff)
962 		return EMSGSIZE;
963 
964 	if (checksumNeeded)
965 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
966 			sizeof(ipv4_header), true);
967 
968 	TRACE(("header chksum: %ld, buffer checksum: %ld\n",
969 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
970 		gBufferModule->checksum(buffer, 0, buffer->size, true)));
971 
972 	TRACE(("destination-IP: buffer=%p addr=%p %08lx\n", buffer, &buffer->destination,
973 		ntohl(destination->sin_addr.s_addr)));
974 
975 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
976 	if (buffer->size > mtu) {
977 		// we need to fragment the packet
978 		return send_fragments(protocol, route, buffer, mtu);
979 	}
980 
981 	return sDatalinkModule->send_data(route, buffer);
982 }
983 
984 
985 status_t
986 ipv4_send_data(net_protocol *protocol, net_buffer *buffer)
987 {
988 	TRACE(("someone tries to send some actual data!\n"));
989 
990 	net_route *route = NULL;
991 	status_t status = sDatalinkModule->get_buffer_route(sDomain, buffer,
992 		&route);
993 	if (status >= B_OK) {
994 		status = ipv4_send_routed_data(protocol, route, buffer);
995 		sDatalinkModule->put_route(sDomain, route);
996 	}
997 
998 	return status;
999 }
1000 
1001 
1002 ssize_t
1003 ipv4_send_avail(net_protocol *protocol)
1004 {
1005 	return B_ERROR;
1006 }
1007 
1008 
1009 status_t
1010 ipv4_read_data(net_protocol *_protocol, size_t numBytes, uint32 flags,
1011 	net_buffer **_buffer)
1012 {
1013 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1014 	RawSocket *raw = protocol->raw;
1015 	if (raw == NULL)
1016 		return B_ERROR;
1017 
1018 	TRACE(("read is waiting for data...\n"));
1019 	return raw->Read(numBytes, flags, protocol->socket->receive.timeout, _buffer);
1020 }
1021 
1022 
1023 ssize_t
1024 ipv4_read_avail(net_protocol *_protocol)
1025 {
1026 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1027 	RawSocket *raw = protocol->raw;
1028 	if (raw == NULL)
1029 		return B_ERROR;
1030 
1031 	return raw->BytesAvailable();
1032 }
1033 
1034 
1035 struct net_domain *
1036 ipv4_get_domain(net_protocol *protocol)
1037 {
1038 	return sDomain;
1039 }
1040 
1041 
1042 size_t
1043 ipv4_get_mtu(net_protocol *protocol, const struct sockaddr *address)
1044 {
1045 	net_route *route = sDatalinkModule->get_route(sDomain, address);
1046 	if (route == NULL)
1047 		return 0;
1048 
1049 	size_t mtu;
1050 	if (route->mtu != 0)
1051 		mtu = route->mtu;
1052 	else
1053 		mtu = route->interface->mtu;
1054 
1055 	sDatalinkModule->put_route(sDomain, route);
1056 	return mtu - sizeof(ipv4_header);
1057 }
1058 
1059 
1060 status_t
1061 ipv4_receive_data(net_buffer *buffer)
1062 {
1063 	TRACE(("IPv4 received a packet (%p) of %ld size!\n", buffer, buffer->size));
1064 
1065 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1066 	if (bufferHeader.Status() < B_OK)
1067 		return bufferHeader.Status();
1068 
1069 	ipv4_header &header = bufferHeader.Data();
1070 	//dump_ipv4_header(header);
1071 
1072 	if (header.version != IP_VERSION)
1073 		return B_BAD_TYPE;
1074 
1075 	uint16 packetLength = header.TotalLength();
1076 	uint16 headerLength = header.HeaderLength();
1077 	if (packetLength > buffer->size
1078 		|| headerLength < sizeof(ipv4_header))
1079 		return B_BAD_DATA;
1080 
1081 	// TODO: would be nice to have a direct checksum function somewhere
1082 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1083 		return B_BAD_DATA;
1084 
1085 	struct sockaddr_in &source = *(struct sockaddr_in *)&buffer->source;
1086 	struct sockaddr_in &destination = *(struct sockaddr_in *)&buffer->destination;
1087 
1088 	source.sin_len = sizeof(sockaddr_in);
1089 	source.sin_family = AF_INET;
1090 	source.sin_addr.s_addr = header.source;
1091 
1092 	destination.sin_len = sizeof(sockaddr_in);
1093 	destination.sin_family = AF_INET;
1094 	destination.sin_addr.s_addr = header.destination;
1095 
1096 	// test if the packet is really for us
1097 	uint32 matchedAddressType;
1098 	if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1099 		&buffer->interface, &matchedAddressType)) {
1100 		TRACE(("this packet was not for us %lx -> %lx\n",
1101 			ntohl(header.source), ntohl(header.destination)));
1102 		return B_ERROR;
1103 	}
1104 	if (matchedAddressType != 0) {
1105 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1106 		buffer->flags |= matchedAddressType;
1107 	}
1108 
1109 	uint8 protocol = buffer->protocol = header.protocol;
1110 
1111 	// remove any trailing/padding data
1112 	status_t status = gBufferModule->trim(buffer, packetLength);
1113 	if (status < B_OK)
1114 		return status;
1115 
1116 	// check for fragmentation
1117 	uint16 fragmentOffset = ntohs(header.fragment_offset);
1118 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1119 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1120 		// this is a fragment
1121 		TRACE(("   Found a Fragment!\n"));
1122 		status = reassemble_fragments(header, &buffer);
1123 		TRACE(("   -> %s!\n", strerror(status)));
1124 		if (status != B_OK)
1125 			return status;
1126 
1127 		if (buffer == NULL) {
1128 			// buffer was put into fragment packet
1129 			TRACE(("   Not yet assembled...\n"));
1130 			return B_OK;
1131 		}
1132 	}
1133 
1134 	// Since the buffer might have been changed (reassembled fragment)
1135 	// we must no longer access bufferHeader or header anymore after
1136 	// this point
1137 
1138 	if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP) {
1139 		// SOCK_RAW doesn't get all packets
1140 		raw_receive_data(buffer);
1141 	}
1142 
1143 	gBufferModule->remove_header(buffer, headerLength);
1144 		// the header is of variable size and may include IP options
1145 		// (that we ignore for now)
1146 
1147 	net_protocol_module_info *module = receiving_protocol(protocol);
1148 	if (module == NULL) {
1149 		// no handler for this packet
1150 		return EAFNOSUPPORT;
1151 	}
1152 
1153 	return module->receive_data(buffer);
1154 }
1155 
1156 
1157 status_t
1158 ipv4_error(uint32 code, net_buffer *data)
1159 {
1160 	return B_ERROR;
1161 }
1162 
1163 
1164 status_t
1165 ipv4_error_reply(net_protocol *protocol, net_buffer *causedError, uint32 code,
1166 	void *errorData)
1167 {
1168 	return B_ERROR;
1169 }
1170 
1171 
1172 //	#pragma mark -
1173 
1174 
1175 status_t
1176 init_ipv4()
1177 {
1178 	status_t status = get_module(NET_STACK_MODULE_NAME, (module_info **)&sStackModule);
1179 	if (status < B_OK)
1180 		return status;
1181 	status = get_module(NET_BUFFER_MODULE_NAME, (module_info **)&gBufferModule);
1182 	if (status < B_OK)
1183 		goto err1;
1184 	status = get_module(NET_DATALINK_MODULE_NAME, (module_info **)&sDatalinkModule);
1185 	if (status < B_OK)
1186 		goto err2;
1187 
1188 	sPacketID = (int32)system_time();
1189 
1190 	status = benaphore_init(&sRawSocketsLock, "raw sockets");
1191 	if (status < B_OK)
1192 		goto err3;
1193 
1194 	status = benaphore_init(&sFragmentLock, "IPv4 Fragments");
1195 	if (status < B_OK)
1196 		goto err4;
1197 
1198 	status = benaphore_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1199 	if (status < B_OK)
1200 		goto err5;
1201 
1202 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1203 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1204 	if (sFragmentHash == NULL)
1205 		goto err6;
1206 
1207 	new (&sRawSockets) RawSocketList;
1208 		// static initializers do not work in the kernel,
1209 		// so we have to do it here, manually
1210 		// TODO: for modules, this shouldn't be required
1211 
1212 	status = sStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1213 		"network/protocols/ipv4/v1", NULL);
1214 	if (status < B_OK)
1215 		goto err7;
1216 
1217 	status = sStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1218 		&gIPv4AddressModule, &sDomain);
1219 	if (status < B_OK)
1220 		goto err7;
1221 
1222 	return B_OK;
1223 
1224 err7:
1225 	hash_uninit(sFragmentHash);
1226 err6:
1227 	benaphore_destroy(&sReceivingProtocolLock);
1228 err5:
1229 	benaphore_destroy(&sFragmentLock);
1230 err4:
1231 	benaphore_destroy(&sRawSocketsLock);
1232 err3:
1233 	put_module(NET_DATALINK_MODULE_NAME);
1234 err2:
1235 	put_module(NET_BUFFER_MODULE_NAME);
1236 err1:
1237 	put_module(NET_STACK_MODULE_NAME);
1238 	return status;
1239 }
1240 
1241 
1242 status_t
1243 uninit_ipv4()
1244 {
1245 	benaphore_lock(&sReceivingProtocolLock);
1246 
1247 	// put all the domain receiving protocols we gathered so far
1248 	for (uint32 i = 0; i < 256; i++) {
1249 		if (sReceivingProtocol[i] != NULL)
1250 			sStackModule->put_domain_receiving_protocol(sDomain, i);
1251 	}
1252 
1253 	sStackModule->unregister_domain(sDomain);
1254 	benaphore_unlock(&sReceivingProtocolLock);
1255 
1256 	hash_uninit(sFragmentHash);
1257 
1258 	benaphore_destroy(&sFragmentLock);
1259 	benaphore_destroy(&sRawSocketsLock);
1260 	benaphore_destroy(&sReceivingProtocolLock);
1261 
1262 	put_module(NET_DATALINK_MODULE_NAME);
1263 	put_module(NET_BUFFER_MODULE_NAME);
1264 	put_module(NET_STACK_MODULE_NAME);
1265 	return B_OK;
1266 }
1267 
1268 
1269 static status_t
1270 ipv4_std_ops(int32 op, ...)
1271 {
1272 	switch (op) {
1273 		case B_MODULE_INIT:
1274 			return init_ipv4();
1275 		case B_MODULE_UNINIT:
1276 			return uninit_ipv4();
1277 
1278 		default:
1279 			return B_ERROR;
1280 	}
1281 }
1282 
1283 
1284 net_protocol_module_info gIPv4Module = {
1285 	{
1286 		"network/protocols/ipv4/v1",
1287 		0,
1288 		ipv4_std_ops
1289 	},
1290 	ipv4_init_protocol,
1291 	ipv4_uninit_protocol,
1292 	ipv4_open,
1293 	ipv4_close,
1294 	ipv4_free,
1295 	ipv4_connect,
1296 	ipv4_accept,
1297 	ipv4_control,
1298 	ipv4_bind,
1299 	ipv4_unbind,
1300 	ipv4_listen,
1301 	ipv4_shutdown,
1302 	ipv4_send_data,
1303 	ipv4_send_routed_data,
1304 	ipv4_send_avail,
1305 	ipv4_read_data,
1306 	ipv4_read_avail,
1307 	ipv4_get_domain,
1308 	ipv4_get_mtu,
1309 	ipv4_receive_data,
1310 	ipv4_error,
1311 	ipv4_error_reply,
1312 };
1313 
1314 module_info *modules[] = {
1315 	(module_info *)&gIPv4Module,
1316 	NULL
1317 };
1318