xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision 9ecf9d1c1d4888d341a6eac72112c72d1ae3a4cb)
1 /*
2  * Copyright 2006, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4_address.h"
11 
12 #include <net_datalink.h>
13 #include <net_protocol.h>
14 #include <net_stack.h>
15 #include <NetBufferUtilities.h>
16 
17 #include <ByteOrder.h>
18 #include <KernelExport.h>
19 #include <util/AutoLock.h>
20 #include <util/list.h>
21 #include <util/khash.h>
22 #include <util/DoublyLinkedList.h>
23 
24 #include <netinet/in.h>
25 #include <netinet/ip.h>
26 #include <new>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 
31 #define TRACE_IPV4
32 #ifdef TRACE_IPV4
33 #	define TRACE(x) dprintf x
34 #else
35 #	define TRACE(x) ;
36 #endif
37 
38 struct ipv4_header {
39 #if B_HOST_IS_LENDIAN == 1
40 	uint8		header_length : 4;	// header length in 32-bit words
41 	uint8		version : 4;
42 #else
43 	uint8		version : 4;
44 	uint8		header_length : 4;
45 #endif
46 	uint8		service_type;
47 	uint16		total_length;
48 	uint16		id;
49 	uint16		fragment_offset;
50 	uint8		time_to_live;
51 	uint8		protocol;
52 	uint16		checksum;
53 	in_addr_t	source;
54 	in_addr_t	destination;
55 
56 	uint16 HeaderLength() const { return header_length << 2; }
57 	uint16 TotalLength() const { return ntohs(total_length); }
58 	uint16 FragmentOffset() const { return ntohs(fragment_offset); }
59 } _PACKED;
60 
61 #define IP_VERSION				4
62 
63 // fragment flags
64 #define IP_RESERVED_FLAG		0x8000
65 #define IP_DONT_FRAGMENT		0x4000
66 #define IP_MORE_FRAGMENTS		0x2000
67 #define IP_FRAGMENT_OFFSET_MASK	0x1fff
68 
69 #define MAX_HASH_FRAGMENTS 		64
70 	// slots in the fragment packet's hash
71 #define FRAGMENT_TIMEOUT		60000000LL
72 	// discard fragment after 60 seconds
73 
74 struct ipv4_fragment : DoublyLinkedListLinkImpl<ipv4_fragment> {
75 	uint16		start;
76 	uint16		end;
77 	net_buffer	*buffer;
78 };
79 
80 typedef DoublyLinkedList<struct ipv4_fragment> FragmentList;
81 
82 struct ipv4_packet_key {
83 	in_addr_t	source;
84 	in_addr_t	destination;
85 	uint16		id;
86 	uint8		protocol;
87 };
88 
89 class FragmentPacket {
90 	public:
91 		FragmentPacket(const ipv4_packet_key &key);
92 		~FragmentPacket();
93 
94 		status_t AddFragment(uint16 start, uint16 end, net_buffer *buffer,
95 					bool lastFragment);
96 		status_t Reassemble(net_buffer *to);
97 
98 		bool IsComplete() const { return fReceivedLastFragment && fBytesLeft == 0; }
99 
100 		static uint32 Hash(void *_packet, const void *_key, uint32 range);
101 		static int Compare(void *_packet, const void *_key);
102 		static int32 NextOffset() { return offsetof(FragmentPacket, fNext); }
103 		static void StaleTimer(struct net_timer *timer, void *data);
104 
105 	private:
106 		FragmentPacket	*fNext;
107 		struct ipv4_packet_key fKey;
108 		bool			fReceivedLastFragment;
109 		int32			fBytesLeft;
110 		FragmentList	fFragments;
111 		net_timer		fTimer;
112 };
113 
114 typedef DoublyLinkedList<class RawSocket> RawSocketList;
115 
116 class RawSocket : public DoublyLinkedListLinkImpl<RawSocket> {
117 	public:
118 		RawSocket();
119 		~RawSocket();
120 
121 		status_t InitCheck();
122 
123 		status_t Read(size_t numBytes, uint32 flags, bigtime_t timeout,
124 					net_buffer **_buffer);
125 		ssize_t BytesAvailable();
126 
127 		status_t Write(net_buffer *buffer);
128 
129 	private:
130 		net_fifo	fFifo;
131 };
132 
133 struct ipv4_protocol : net_protocol {
134 	RawSocket	*raw;
135 	uint32		flags;
136 };
137 
138 // protocol flags
139 #define IP_FLAG_HEADER_INCLUDED	0x01
140 
141 
142 extern net_protocol_module_info gIPv4Module;
143 	// we need this in ipv4_std_ops() for registering the AF_INET domain
144 
145 static struct net_domain *sDomain;
146 static net_datalink_module_info *sDatalinkModule;
147 static net_stack_module_info *sStackModule;
148 struct net_buffer_module_info *sBufferModule;
149 static int32 sPacketID;
150 static RawSocketList sRawSockets;
151 static benaphore sRawSocketsLock;
152 static benaphore sFragmentLock;
153 static hash_table *sFragmentHash;
154 
155 
156 RawSocket::RawSocket()
157 {
158 	status_t status = sStackModule->init_fifo(&fFifo, "ipv4 raw socket", 65536);
159 	if (status < B_OK)
160 		fFifo.notify = status;
161 }
162 
163 
164 RawSocket::~RawSocket()
165 {
166 	if (fFifo.notify >= B_OK)
167 		sStackModule->uninit_fifo(&fFifo);
168 }
169 
170 
171 status_t
172 RawSocket::InitCheck()
173 {
174 	return fFifo.notify >= B_OK ? B_OK : fFifo.notify;
175 }
176 
177 
178 status_t
179 RawSocket::Read(size_t numBytes, uint32 flags, bigtime_t timeout,
180 	net_buffer **_buffer)
181 {
182 	net_buffer *buffer;
183 	status_t status = sStackModule->fifo_dequeue_buffer(&fFifo,
184 		flags, timeout, &buffer);
185 	if (status < B_OK)
186 		return status;
187 
188 	if (numBytes < buffer->size) {
189 		// discard any data behind the amount requested
190 		sBufferModule->trim(buffer, numBytes);
191 	}
192 
193 	*_buffer = buffer;
194 	return B_OK;
195 }
196 
197 
198 ssize_t
199 RawSocket::BytesAvailable()
200 {
201 	return fFifo.current_bytes;
202 }
203 
204 
205 status_t
206 RawSocket::Write(net_buffer *source)
207 {
208 	// we need to make a clone for that buffer and pass it to the socket
209 	net_buffer *buffer = sBufferModule->clone(source, false);
210 	TRACE(("ipv4::RawSocket::Write(): cloned buffer %p\n", buffer));
211 	if (buffer == NULL)
212 		return B_NO_MEMORY;
213 
214 	return sStackModule->fifo_enqueue_buffer(&fFifo, buffer);
215 }
216 
217 
218 //	#pragma mark -
219 
220 
221 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
222 	:
223 	fKey(key),
224 	fReceivedLastFragment(false),
225 	fBytesLeft(IP_MAXPACKET)
226 {
227 	sStackModule->init_timer(&fTimer, StaleTimer, this);
228 }
229 
230 
231 FragmentPacket::~FragmentPacket()
232 {
233 	// cancel the kill timer
234 	sStackModule->set_timer(&fTimer, -1);
235 
236 	// delete all fragments
237 	ipv4_fragment *fragment;
238 	while ((fragment = fFragments.RemoveHead()) != NULL) {
239 		if (fragment->buffer != NULL)
240 			sBufferModule->free(fragment->buffer);
241 		delete fragment;
242 	}
243 }
244 
245 
246 status_t
247 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer *buffer,
248 	bool lastFragment)
249 {
250 	// restart the timer
251 	sStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
252 
253 	if (start >= end) {
254 		// invalid fragment
255 		return B_BAD_DATA;
256 	}
257 
258 	// Search for a position in the list to insert the fragment
259 
260 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
261 	ipv4_fragment *previous = NULL;
262 	ipv4_fragment *next = NULL;
263 	while ((previous = iterator.Next()) != NULL) {
264 
265 		if (previous->start <= start) {
266 			// The new fragment can be inserted after this one
267 			break;
268 		}
269 
270 		next = previous;
271 	}
272 
273 	// See if we already have the fragment's data
274 
275 	if (previous != NULL && previous->start <= start && previous->end >= end) {
276 		// we do, so we can just drop this fragment
277 		sBufferModule->free(buffer);
278 		return B_OK;
279 	}
280 
281 	TRACE(("    previous: %p, next: %p\n", previous, next));
282 
283 	// If we have parts of the data already, truncate as needed
284 
285 	if (previous != NULL && previous->end > start) {
286 		TRACE(("    remove header %d bytes\n", previous->end - start));
287 		sBufferModule->remove_header(buffer, previous->end - start);
288 		start = previous->end;
289 	}
290 	if (next != NULL && next->start < end) {
291 		TRACE(("    remove trailer %d bytes\n", next->start - end));
292 		sBufferModule->remove_trailer(buffer, next->start - end);
293 		end = next->start;
294 	}
295 
296 	// Now try if we can already merge the fragments together
297 
298 	// We will always keep the last buffer received, so that we can still
299 	// report an error (in which case we're not responsible for freeing it)
300 
301 	if (previous != NULL && previous->end == start) {
302 		status_t status = sBufferModule->merge(buffer, previous->buffer, false);
303 		TRACE(("    merge previous: %s\n", strerror(status)));
304 		if (status < B_OK)
305 			return status;
306 
307 		previous->buffer = buffer;
308 		previous->end = end;
309 
310 		// cut down existing hole
311 		fBytesLeft -= end - start;
312 
313 		if (lastFragment && !fReceivedLastFragment) {
314 			fReceivedLastFragment = true;
315 			fBytesLeft -= IP_MAXPACKET - end;
316 		}
317 
318 		TRACE(("    hole length: %d\n", (int)fBytesLeft));
319 
320 		return B_OK;
321 	} else if (next != NULL && next->start == end) {
322 		status_t status = sBufferModule->merge(buffer, next->buffer, true);
323 		TRACE(("    merge next: %s\n", strerror(status)));
324 		if (status < B_OK)
325 			return status;
326 
327 		next->buffer = buffer;
328 		next->start = start;
329 
330 		// cut down existing hole
331 		fBytesLeft -= end - start;
332 
333 		if (lastFragment && !fReceivedLastFragment) {
334 			fReceivedLastFragment = true;
335 			fBytesLeft -= IP_MAXPACKET - end;
336 		}
337 
338 		TRACE(("    hole length: %d\n", (int)fBytesLeft));
339 
340 		return B_OK;
341 	}
342 
343 	// We couldn't merge the fragments, so we need to add a new fragment
344 
345 	ipv4_fragment *fragment = new (std::nothrow) ipv4_fragment;
346 	TRACE(("    new fragment: %p, bytes %d-%d\n", fragment, start, end));
347 	if (fragment == NULL)
348 		return B_NO_MEMORY;
349 
350 	fragment->start = start;
351 	fragment->end = end;
352 	fragment->buffer = buffer;
353 	fFragments.Insert(next, fragment);
354 
355 	// update length of the hole, if any
356 	fBytesLeft -= end - start;
357 
358 	if (lastFragment && !fReceivedLastFragment) {
359 		fReceivedLastFragment = true;
360 		fBytesLeft -= IP_MAXPACKET - end;
361 	}
362 
363 	TRACE(("    hole length: %d\n", (int)fBytesLeft));
364 
365 	return B_OK;
366 }
367 
368 
369 /*!
370 	Reassembles the fragments to the specified buffer \a to.
371 	This buffer must have been added via AddFragment() before.
372 */
373 status_t
374 FragmentPacket::Reassemble(net_buffer *to)
375 {
376 	if (!IsComplete())
377 		return NULL;
378 
379 	net_buffer *buffer = NULL;
380 
381 	ipv4_fragment *fragment;
382 	while ((fragment = fFragments.RemoveHead()) != NULL) {
383 		if (buffer != NULL) {
384 			status_t status;
385 			if (to == fragment->buffer) {
386 				status = sBufferModule->merge(fragment->buffer, buffer, false);
387 				buffer = fragment->buffer;
388 			} else
389 				status = sBufferModule->merge(buffer, fragment->buffer, true);
390 			if (status < B_OK)
391 				return status;
392 		} else
393 			buffer = fragment->buffer;
394 
395 		delete fragment;
396 	}
397 
398 	if (buffer != to)
399 		panic("ipv4 packet reassembly did not work correctly.\n");
400 
401 	return B_OK;
402 }
403 
404 
405 int
406 FragmentPacket::Compare(void *_packet, const void *_key)
407 {
408 	const ipv4_packet_key *key = (ipv4_packet_key *)_key;
409 	ipv4_packet_key *packetKey = &((FragmentPacket *)_packet)->fKey;
410 
411 	if (packetKey->id == key->id
412 		&& packetKey->source == key->source
413 		&& packetKey->destination == key->destination
414 		&& packetKey->protocol == key->protocol)
415 		return 0;
416 
417 	return 1;
418 }
419 
420 
421 uint32
422 FragmentPacket::Hash(void *_packet, const void *_key, uint32 range)
423 {
424 	const struct ipv4_packet_key *key = (struct ipv4_packet_key *)_key;
425 	FragmentPacket *packet = (FragmentPacket *)_packet;
426 	if (packet != NULL)
427 		key = &packet->fKey;
428 
429 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
430 }
431 
432 
433 void
434 FragmentPacket::StaleTimer(struct net_timer *timer, void *data)
435 {
436 	BenaphoreLocker locker(&sFragmentLock);
437 	hash_remove(sFragmentHash, (FragmentPacket *)data);
438 
439 	TRACE(("Assembling FragmentPacket timed out!\n"));
440 	delete (FragmentPacket *)data;
441 }
442 
443 //	#pragma mark -
444 
445 
446 static void
447 dump_ipv4_header(ipv4_header &header)
448 {
449 	struct pretty_ipv4 {
450 	#if B_HOST_IS_LENDIAN == 1
451 		uint8 a;
452 		uint8 b;
453 		uint8 c;
454 		uint8 d;
455 	#else
456 		uint8 d;
457 		uint8 c;
458 		uint8 b;
459 		uint8 a;
460 	#endif
461 	};
462 	struct pretty_ipv4 *src = (struct pretty_ipv4 *)&header.source;
463 	struct pretty_ipv4 *dst = (struct pretty_ipv4 *)&header.destination;
464 	dprintf("  version: %d\n", header.version);
465 	dprintf("  header_length: 4 * %d\n", header.header_length);
466 	dprintf("  service_type: %d\n", header.service_type);
467 	dprintf("  total_length: %d\n", header.TotalLength());
468 	dprintf("  id: %d\n", ntohs(header.id));
469 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
470 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
471 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
472 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
473 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
474 	dprintf("  time_to_live: %d\n", header.time_to_live);
475 	dprintf("  protocol: %d\n", header.protocol);
476 	dprintf("  checksum: %d\n", ntohs(header.checksum));
477 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
478 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
479 }
480 
481 
482 /*!
483 	Attempts to re-assemble fragmented packets.
484 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
485 		will point to its buffer, otherwise, it will be \c NULL.
486 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
487 
488 	TODO: Implement packet aging
489 */
490 static status_t
491 reassemble_fragments(const ipv4_header &header, net_buffer **_buffer)
492 {
493 	net_buffer *buffer = *_buffer;
494 	status_t status;
495 
496 	struct ipv4_packet_key key;
497 	key.source = (in_addr_t)header.source;
498 	key.destination = (in_addr_t)header.destination;
499 	key.id = header.id;
500 	key.protocol = header.protocol;
501 
502 	// TODO: Make locking finer grained.
503 	BenaphoreLocker locker(&sFragmentLock);
504 
505 	FragmentPacket *packet = (FragmentPacket *)hash_lookup(sFragmentHash, &key);
506 	if (packet == NULL) {
507 		// New fragment packet
508 		packet = new (std::nothrow) FragmentPacket(key);
509 		if (packet == NULL)
510 			return B_NO_MEMORY;
511 
512 		// add packet to hash
513 		status = hash_insert(sFragmentHash, packet);
514 		if (status != B_OK) {
515 			delete packet;
516 			return status;
517 		}
518 	}
519 
520 	uint16 fragmentOffset = header.FragmentOffset();
521 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
522 	uint16 end = start + header.TotalLength() - header.HeaderLength();
523 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
524 
525 	TRACE(("   Received IPv4 %sfragment of size %d, offset %d.\n",
526 		lastFragment ? "last ": "", end - start, start));
527 
528 	// Remove header unless this is the first fragment
529 	if (start != 0)
530 		sBufferModule->remove_header(buffer, header.HeaderLength());
531 
532 	status = packet->AddFragment(start, end, buffer, lastFragment);
533 	if (status != B_OK)
534 		return status;
535 
536 	if (packet->IsComplete()) {
537 		hash_remove(sFragmentHash, packet);
538 			// no matter if reassembling succeeds, we won't need this packet anymore
539 
540 		status = packet->Reassemble(buffer);
541 		delete packet;
542 
543 		// _buffer does not change
544 		return status;
545 	}
546 
547 	// This indicates that the packet is not yet complete
548 	*_buffer = NULL;
549 	return B_OK;
550 }
551 
552 
553 static void
554 raw_receive_data(net_buffer *buffer)
555 {
556 	BenaphoreLocker locker(sRawSocketsLock);
557 	RawSocketList::Iterator iterator = sRawSockets.GetIterator();
558 
559 	while (iterator.HasNext()) {
560 		RawSocket *raw = iterator.Next();
561 		raw->Write(buffer);
562 	}
563 }
564 
565 
566 //	#pragma mark -
567 
568 
569 net_protocol *
570 ipv4_init_protocol(net_socket *socket)
571 {
572 	ipv4_protocol *protocol = new (std::nothrow) ipv4_protocol;
573 	if (protocol == NULL)
574 		return NULL;
575 
576 	protocol->raw = NULL;
577 	protocol->flags = 0;
578 	return protocol;
579 }
580 
581 
582 status_t
583 ipv4_uninit_protocol(net_protocol *_protocol)
584 {
585 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
586 
587 	delete protocol->raw;
588 	delete protocol;
589 	return B_OK;
590 }
591 
592 
593 /*!
594 	Since open() is only called on the top level protocol, when we get here
595 	it means we are on a SOCK_RAW socket.
596 */
597 status_t
598 ipv4_open(net_protocol *_protocol)
599 {
600 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
601 
602 	RawSocket *raw = new (std::nothrow) RawSocket;
603 	if (raw == NULL)
604 		return B_NO_MEMORY;
605 
606 	status_t status = raw->InitCheck();
607 	if (status < B_OK) {
608 		delete raw;
609 		return status;
610 	}
611 
612 	protocol->raw = raw;
613 
614 	BenaphoreLocker locker(sRawSocketsLock);
615 	sRawSockets.Add(raw);
616 	return B_OK;
617 }
618 
619 
620 status_t
621 ipv4_close(net_protocol *_protocol)
622 {
623 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
624 	RawSocket *raw = protocol->raw;
625 	if (raw == NULL)
626 		return B_ERROR;
627 
628 	BenaphoreLocker locker(sRawSocketsLock);
629 	sRawSockets.Remove(raw);
630 	delete raw;
631 	protocol->raw = NULL;
632 
633 	return B_OK;
634 }
635 
636 
637 status_t
638 ipv4_free(net_protocol *protocol)
639 {
640 	return B_OK;
641 }
642 
643 
644 status_t
645 ipv4_connect(net_protocol *protocol, const struct sockaddr *address)
646 {
647 	return B_ERROR;
648 }
649 
650 
651 status_t
652 ipv4_accept(net_protocol *protocol, struct net_socket **_acceptedSocket)
653 {
654 	return EOPNOTSUPP;
655 }
656 
657 
658 status_t
659 ipv4_control(net_protocol *_protocol, int level, int option, void *value,
660 	size_t *_length)
661 {
662 	if ((level & LEVEL_MASK) != IPPROTO_IP)
663 		return sDatalinkModule->control(sDomain, option, value, _length);
664 
665 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
666 
667 	if (level & LEVEL_GET_OPTION) {
668 		// get options
669 
670 		switch (option) {
671 			case IP_HDRINCL:
672 			{
673 				if (*_length != sizeof(int))
674 					return B_BAD_VALUE;
675 
676 				int headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
677 				return user_memcpy(value, &headerIncluded, sizeof(headerIncluded));
678 			}
679 
680 			default:
681 				return ENOPROTOOPT;
682 		}
683 	} else {
684 		// set options
685 
686 		switch (option) {
687 			case IP_HDRINCL:
688 			{
689 				int headerIncluded;
690 				if (*_length != sizeof(int))
691 					return B_BAD_VALUE;
692 				if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded)) < B_OK)
693 					return B_BAD_ADDRESS;
694 
695 				if (headerIncluded)
696 					protocol->flags |= IP_FLAG_HEADER_INCLUDED;
697 				else
698 					protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
699 				break;
700 			}
701 
702 			default:
703 				return ENOPROTOOPT;
704 		}
705 	}
706 
707 	return B_BAD_VALUE;
708 }
709 
710 
711 status_t
712 ipv4_bind(net_protocol *protocol, struct sockaddr *address)
713 {
714 	if (address->sa_family != AF_INET)
715 		return EAFNOSUPPORT;
716 
717 	// only INADDR_ANY and addresses of local interfaces are accepted:
718 	if (((sockaddr_in *)address)->sin_addr.s_addr == INADDR_ANY
719 		|| sDatalinkModule->is_local_address(sDomain, address)) {
720 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
721 			// explicitly set length, as our callers can't be trusted to
722 			// always provide the correct length!
723 		return B_OK;
724 	}
725 
726 	return B_ERROR;
727 		// address is unknown on this host
728 }
729 
730 
731 status_t
732 ipv4_unbind(net_protocol *protocol, struct sockaddr *address)
733 {
734 	// nothing to do here
735 	return B_OK;
736 }
737 
738 
739 status_t
740 ipv4_listen(net_protocol *protocol, int count)
741 {
742 	return EOPNOTSUPP;
743 }
744 
745 
746 status_t
747 ipv4_shutdown(net_protocol *protocol, int direction)
748 {
749 	return EOPNOTSUPP;
750 }
751 
752 
753 status_t
754 ipv4_send_routed_data(net_protocol *_protocol, struct net_route *route,
755 	net_buffer *buffer)
756 {
757 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
758 	net_interface *interface = route->interface;
759 
760 	TRACE(("someone tries to send some actual routed data!\n"));
761 
762 	sockaddr_in &source = *(sockaddr_in *)&buffer->source;
763 	if (source.sin_addr.s_addr == INADDR_ANY) {
764 		// replace an unbound source address with the address of the interface
765 		// TODO: couldn't we replace all addresses here?
766 		source.sin_addr.s_addr = ((sockaddr_in *)route->interface->address)->sin_addr.s_addr;
767 	}
768 
769 	// Add IP header (if needed)
770 
771 	if (protocol == NULL || (protocol->flags & IP_FLAG_HEADER_INCLUDED) == 0) {
772 		NetBufferPrepend<ipv4_header> bufferHeader(buffer);
773 		if (bufferHeader.Status() < B_OK)
774 			return bufferHeader.Status();
775 
776 		ipv4_header &header = bufferHeader.Data();
777 
778 		header.version = IP_VERSION;
779 		header.header_length = sizeof(ipv4_header) >> 2;
780 		header.service_type = 0;
781 		header.total_length = htons(buffer->size);
782 		header.id = htons(atomic_add(&sPacketID, 1));
783 		header.fragment_offset = 0;
784 		header.time_to_live = 254;
785 		header.protocol = protocol ? protocol->socket->protocol : buffer->protocol;
786 		header.checksum = 0;
787 		header.source = ((sockaddr_in *)route->interface->address)->sin_addr.s_addr;
788 			// always use the actual used source address
789 		header.destination = ((sockaddr_in *)&buffer->destination)->sin_addr.s_addr;
790 
791 		header.checksum = sBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true);
792 		dump_ipv4_header(header);
793 
794 		bufferHeader.Detach();
795 			// make sure the IP-header is already written to the buffer at this point
796 	}
797 
798 	TRACE(("header chksum: %ld, buffer checksum: %ld\n",
799 		sBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
800 		sBufferModule->checksum(buffer, 0, buffer->size, true)));
801 
802 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
803 	if (buffer->size > mtu) {
804 		// we need to fragment the packet
805 		dprintf("ipv4 needs to fragment (size %lu, MTU %lu), but that's not yet implemented...\n", buffer->size, mtu);
806 		return B_ERROR;
807 	}
808 
809 	TRACE(("destination-IP: buffer=%p addr=%p %08lx\n", buffer, &buffer->destination,
810 		ntohl(((sockaddr_in *)&buffer->destination)->sin_addr.s_addr)));
811 
812 	return sDatalinkModule->send_data(route, buffer);
813 }
814 
815 
816 status_t
817 ipv4_send_data(net_protocol *protocol, net_buffer *buffer)
818 {
819 	TRACE(("someone tries to send some actual data!\n"));
820 
821 	// find route
822 	struct net_route *route = sDatalinkModule->get_route(sDomain,
823 		(sockaddr *)&buffer->destination);
824 	if (route == NULL)
825 		return ENETUNREACH;
826 
827 	status_t status = ipv4_send_routed_data(protocol, route, buffer);
828 	sDatalinkModule->put_route(sDomain, route);
829 
830 	return status;
831 }
832 
833 
834 ssize_t
835 ipv4_send_avail(net_protocol *protocol)
836 {
837 	return B_ERROR;
838 }
839 
840 
841 status_t
842 ipv4_read_data(net_protocol *_protocol, size_t numBytes, uint32 flags,
843 	net_buffer **_buffer)
844 {
845 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
846 	RawSocket *raw = protocol->raw;
847 	if (raw == NULL)
848 		return B_ERROR;
849 
850 	TRACE(("read is waiting for data...\n"));
851 	return raw->Read(numBytes, flags, protocol->socket->receive.timeout, _buffer);
852 }
853 
854 
855 ssize_t
856 ipv4_read_avail(net_protocol *_protocol)
857 {
858 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
859 	RawSocket *raw = protocol->raw;
860 	if (raw == NULL)
861 		return B_ERROR;
862 
863 	return raw->BytesAvailable();
864 }
865 
866 
867 struct net_domain *
868 ipv4_get_domain(net_protocol *protocol)
869 {
870 	return sDomain;
871 }
872 
873 
874 size_t
875 ipv4_get_mtu(net_protocol *protocol, const struct sockaddr *address)
876 {
877 	net_route *route = sDatalinkModule->get_route(sDomain, address);
878 	if (route == NULL)
879 		return 0;
880 
881 	size_t mtu;
882 	if (route->mtu != 0)
883 		mtu = route->mtu;
884 	else
885 		mtu = route->interface->mtu;
886 
887 	sDatalinkModule->put_route(sDomain, route);
888 	return mtu - sizeof(ipv4_header);
889 }
890 
891 
892 status_t
893 ipv4_receive_data(net_buffer *buffer)
894 {
895 	TRACE(("IPv4 received a packet of %ld size!\n", buffer->size));
896 
897 	NetBufferHeader<ipv4_header> bufferHeader(buffer);
898 	if (bufferHeader.Status() < B_OK)
899 		return bufferHeader.Status();
900 
901 	ipv4_header &header = bufferHeader.Data();
902 	bufferHeader.Detach();
903 	dump_ipv4_header(header);
904 
905 	if (header.version != IP_VERSION)
906 		return B_BAD_TYPE;
907 
908 	uint16 packetLength = header.TotalLength();
909 	uint16 headerLength = header.HeaderLength();
910 	if (packetLength > buffer->size
911 		|| headerLength < sizeof(ipv4_header))
912 		return B_BAD_DATA;
913 
914 	// TODO: would be nice to have a direct checksum function somewhere
915 	if (sBufferModule->checksum(buffer, 0, headerLength, true) != 0)
916 		return B_BAD_DATA;
917 
918 	struct sockaddr_in &source = *(struct sockaddr_in *)&buffer->source;
919 	struct sockaddr_in &destination = *(struct sockaddr_in *)&buffer->destination;
920 
921 	source.sin_len = sizeof(sockaddr_in);
922 	source.sin_family = AF_INET;
923 	source.sin_addr.s_addr = header.source;
924 
925 	destination.sin_len = sizeof(sockaddr_in);
926 	destination.sin_family = AF_INET;
927 	destination.sin_addr.s_addr = header.destination;
928 
929 	// test if the packet is really for us
930 	uint32 matchedAddressType;
931 	if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
932 		&buffer->interface, &matchedAddressType)) {
933 		TRACE(("this packet was not for us\n"));
934 		return B_ERROR;
935 	}
936 	if (matchedAddressType != 0) {
937 		// copy over special address types (MSG_BCAST or MSG_MCAST):
938 		buffer->flags |= matchedAddressType;
939 	}
940 
941 	uint8 protocol = buffer->protocol = header.protocol;
942 
943 	// remove any trailing/padding data
944 	status_t status = sBufferModule->trim(buffer, packetLength);
945 	if (status < B_OK)
946 		return status;
947 
948 	// check for fragmentation
949 	uint16 fragmentOffset = ntohs(header.fragment_offset);
950 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
951 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
952 		// this is a fragment
953 		TRACE(("   Found a Fragment!\n"));
954 		status = reassemble_fragments(header, &buffer);
955 		TRACE(("   -> %s!\n", strerror(status)));
956 		if (status != B_OK)
957 			return status;
958 
959 		if (buffer == NULL) {
960 			// buffer was put into fragment packet
961 			TRACE(("   Not yet assembled...\n"));
962 			return B_OK;
963 		}
964 	}
965 
966 	// Since the buffer might have been changed (reassembled fragment)
967 	// we must no longer access bufferHeader or header anymore after
968 	// this point
969 
970 	if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP) {
971 		// SOCK_RAW doesn't get all packets
972 		raw_receive_data(buffer);
973 	}
974 
975 	sBufferModule->remove_header(buffer, headerLength);
976 		// the header is of variable size and may include IP options
977 		// (that we ignore for now)
978 
979 	// TODO: since we'll doing this for every packet, we may want to cache the module
980 	//	(and only put them when we're about to be unloaded)
981 	net_protocol_module_info *module;
982 	status = sStackModule->get_domain_receiving_protocol(sDomain, protocol, &module);
983 	if (status < B_OK) {
984 		// no handler for this packet
985 		return status;
986 	}
987 
988 	status = module->receive_data(buffer);
989 	sStackModule->put_domain_receiving_protocol(sDomain, protocol);
990 
991 	return status;
992 }
993 
994 
995 status_t
996 ipv4_error(uint32 code, net_buffer *data)
997 {
998 	return B_ERROR;
999 }
1000 
1001 
1002 status_t
1003 ipv4_error_reply(net_protocol *protocol, net_buffer *causedError, uint32 code,
1004 	void *errorData)
1005 {
1006 	return B_ERROR;
1007 }
1008 
1009 
1010 //	#pragma mark -
1011 
1012 
1013 status_t
1014 init_ipv4()
1015 {
1016 	status_t status = get_module(NET_STACK_MODULE_NAME, (module_info **)&sStackModule);
1017 	if (status < B_OK)
1018 		return status;
1019 	status = get_module(NET_BUFFER_MODULE_NAME, (module_info **)&sBufferModule);
1020 	if (status < B_OK)
1021 		goto err1;
1022 	status = get_module(NET_DATALINK_MODULE_NAME, (module_info **)&sDatalinkModule);
1023 	if (status < B_OK)
1024 		goto err2;
1025 
1026 	sPacketID = (int32)system_time();
1027 
1028 	status = benaphore_init(&sRawSocketsLock, "raw sockets");
1029 	if (status < B_OK)
1030 		goto err3;
1031 
1032 	status = benaphore_init(&sFragmentLock, "IPv4 Fragments");
1033 	if (status < B_OK)
1034 		goto err4;
1035 
1036 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1037 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1038 	if (sFragmentHash == NULL)
1039 		goto err5;
1040 
1041 	new (&sRawSockets) RawSocketList;
1042 		// static initializers do not work in the kernel,
1043 		// so we have to do it here, manually
1044 		// TODO: for modules, this shouldn't be required
1045 
1046 	status = sStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1047 		"network/protocols/ipv4/v1", NULL);
1048 	if (status < B_OK)
1049 		goto err6;
1050 
1051 	status = sStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1052 		&gIPv4AddressModule, &sDomain);
1053 	if (status < B_OK)
1054 		goto err6;
1055 
1056 	return B_OK;
1057 
1058 err6:
1059 	hash_uninit(sFragmentHash);
1060 err5:
1061 	benaphore_destroy(&sFragmentLock);
1062 err4:
1063 	benaphore_destroy(&sRawSocketsLock);
1064 err3:
1065 	put_module(NET_DATALINK_MODULE_NAME);
1066 err2:
1067 	put_module(NET_BUFFER_MODULE_NAME);
1068 err1:
1069 	put_module(NET_STACK_MODULE_NAME);
1070 	return status;
1071 }
1072 
1073 
1074 status_t
1075 uninit_ipv4()
1076 {
1077 	hash_uninit(sFragmentHash);
1078 
1079 	benaphore_destroy(&sFragmentLock);
1080 	benaphore_destroy(&sRawSocketsLock);
1081 
1082 	sStackModule->unregister_domain(sDomain);
1083 	put_module(NET_DATALINK_MODULE_NAME);
1084 	put_module(NET_BUFFER_MODULE_NAME);
1085 	put_module(NET_STACK_MODULE_NAME);
1086 	return B_OK;
1087 }
1088 
1089 
1090 static status_t
1091 ipv4_std_ops(int32 op, ...)
1092 {
1093 	switch (op) {
1094 		case B_MODULE_INIT:
1095 			return init_ipv4();
1096 		case B_MODULE_UNINIT:
1097 			return uninit_ipv4();
1098 
1099 		default:
1100 			return B_ERROR;
1101 	}
1102 }
1103 
1104 
1105 net_protocol_module_info gIPv4Module = {
1106 	{
1107 		"network/protocols/ipv4/v1",
1108 		0,
1109 		ipv4_std_ops
1110 	},
1111 	ipv4_init_protocol,
1112 	ipv4_uninit_protocol,
1113 	ipv4_open,
1114 	ipv4_close,
1115 	ipv4_free,
1116 	ipv4_connect,
1117 	ipv4_accept,
1118 	ipv4_control,
1119 	ipv4_bind,
1120 	ipv4_unbind,
1121 	ipv4_listen,
1122 	ipv4_shutdown,
1123 	ipv4_send_data,
1124 	ipv4_send_routed_data,
1125 	ipv4_send_avail,
1126 	ipv4_read_data,
1127 	ipv4_read_avail,
1128 	ipv4_get_domain,
1129 	ipv4_get_mtu,
1130 	ipv4_receive_data,
1131 	ipv4_error,
1132 	ipv4_error_reply,
1133 };
1134 
1135 module_info *modules[] = {
1136 	(module_info *)&gIPv4Module,
1137 	NULL
1138 };
1139