xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision 981f1b1135291a4fca290fbdf69910dc2f24abdd)
1 /*
2  * Copyright 2006-2009, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4_address.h"
11 #include "multicast.h"
12 
13 #include <net_datalink.h>
14 #include <net_datalink_protocol.h>
15 #include <net_device.h>
16 #include <net_protocol.h>
17 #include <net_stack.h>
18 #include <NetBufferUtilities.h>
19 #include <ProtocolUtilities.h>
20 
21 #include <ByteOrder.h>
22 #include <KernelExport.h>
23 #include <util/AutoLock.h>
24 #include <util/list.h>
25 #include <util/khash.h>
26 #include <util/DoublyLinkedList.h>
27 #include <util/MultiHashTable.h>
28 
29 #include <netinet/in.h>
30 #include <netinet/ip.h>
31 #include <new>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <utility>
36 
37 
38 //#define TRACE_IPV4
39 #ifdef TRACE_IPV4
40 #	define TRACE(format, args...) \
41 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
42 #	define TRACE_SK(protocol, format, args...) \
43 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
44 			protocol , ##args)
45 #else
46 #	define TRACE(args...)		do { } while (0)
47 #	define TRACE_SK(args...)	do { } while (0)
48 #endif
49 
50 struct ipv4_header {
51 #if B_HOST_IS_LENDIAN == 1
52 	uint8		header_length : 4;	// header length in 32-bit words
53 	uint8		version : 4;
54 #else
55 	uint8		version : 4;
56 	uint8		header_length : 4;
57 #endif
58 	uint8		service_type;
59 	uint16		total_length;
60 	uint16		id;
61 	uint16		fragment_offset;
62 	uint8		time_to_live;
63 	uint8		protocol;
64 	uint16		checksum;
65 	in_addr_t	source;
66 	in_addr_t	destination;
67 
68 	uint16 HeaderLength() const { return header_length << 2; }
69 	uint16 TotalLength() const { return ntohs(total_length); }
70 	uint16 FragmentOffset() const { return ntohs(fragment_offset); }
71 } _PACKED;
72 
73 #define IP_VERSION				4
74 
75 // fragment flags
76 #define IP_RESERVED_FLAG		0x8000
77 #define IP_DONT_FRAGMENT		0x4000
78 #define IP_MORE_FRAGMENTS		0x2000
79 #define IP_FRAGMENT_OFFSET_MASK	0x1fff
80 
81 #define MAX_HASH_FRAGMENTS 		64
82 	// slots in the fragment packet's hash
83 #define FRAGMENT_TIMEOUT		60000000LL
84 	// discard fragment after 60 seconds
85 
86 typedef DoublyLinkedList<struct net_buffer,
87 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
88 
89 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
90 
91 struct ipv4_packet_key {
92 	in_addr_t	source;
93 	in_addr_t	destination;
94 	uint16		id;
95 	uint8		protocol;
96 };
97 
98 class FragmentPacket {
99 public:
100 							FragmentPacket(const ipv4_packet_key &key);
101 							~FragmentPacket();
102 
103 			status_t		AddFragment(uint16 start, uint16 end,
104 								net_buffer* buffer, bool lastFragment);
105 			status_t		Reassemble(net_buffer* to);
106 
107 			bool			IsComplete() const
108 								{ return fReceivedLastFragment
109 									&& fBytesLeft == 0; }
110 
111 	static	uint32			Hash(void* _packet, const void* _key, uint32 range);
112 	static	int				Compare(void* _packet, const void* _key);
113 	static	int32			NextOffset()
114 								{ return offsetof(FragmentPacket, fNext); }
115 	static	void			StaleTimer(struct net_timer* timer, void* data);
116 
117 private:
118 			FragmentPacket	*fNext;
119 			struct ipv4_packet_key fKey;
120 			bool			fReceivedLastFragment;
121 			int32			fBytesLeft;
122 			FragmentList	fFragments;
123 			net_timer		fTimer;
124 };
125 
126 
127 class RawSocket
128 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
129 public:
130 							RawSocket(net_socket* socket);
131 };
132 
133 typedef DoublyLinkedList<RawSocket> RawSocketList;
134 
135 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
136 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
137 
138 struct MulticastStateHash {
139 	typedef std::pair<const in_addr* , uint32> KeyType;
140 	typedef IPv4GroupInterface ValueType;
141 
142 	size_t HashKey(const KeyType &key) const
143 		{ return key.first->s_addr ^ key.second; }
144 	size_t Hash(ValueType* value) const
145 		{ return HashKey(std::make_pair(&value->Address(),
146 			value->Interface()->index)); }
147 	bool Compare(const KeyType &key, ValueType* value) const
148 		{ return value->Interface()->index == key.second
149 			&& value->Address().s_addr == key.first->s_addr; }
150 	bool CompareValues(ValueType* value1, ValueType* value2) const
151 		{ return value1->Interface()->index == value2->Interface()->index
152 			&& value1->Address().s_addr == value2->Address().s_addr; }
153 	HashTableLink<ValueType>* GetLink(ValueType* value) const { return value; }
154 };
155 
156 
157 struct ipv4_protocol : net_protocol {
158 	ipv4_protocol()
159 		:
160 		multicast_filter(this)
161 	{
162 	}
163 
164 	RawSocket	*raw;
165 	uint8		service_type;
166 	uint8		time_to_live;
167 	uint8		multicast_time_to_live;
168 	uint32		flags;
169 
170 	IPv4MulticastFilter multicast_filter;
171 };
172 
173 // protocol flags
174 #define IP_FLAG_HEADER_INCLUDED	0x01
175 
176 
177 static const int kDefaultTTL = 254;
178 static const int kDefaultMulticastTTL = 1;
179 
180 
181 extern net_protocol_module_info gIPv4Module;
182 	// we need this in ipv4_std_ops() for registering the AF_INET domain
183 
184 net_stack_module_info* gStackModule;
185 net_buffer_module_info* gBufferModule;
186 
187 static struct net_domain* sDomain;
188 static net_datalink_module_info* sDatalinkModule;
189 static net_socket_module_info* sSocketModule;
190 static int32 sPacketID;
191 static RawSocketList sRawSockets;
192 static mutex sRawSocketsLock;
193 static mutex sFragmentLock;
194 static hash_table* sFragmentHash;
195 static mutex sMulticastGroupsLock;
196 
197 typedef MultiHashTable<MulticastStateHash> MulticastState;
198 static MulticastState* sMulticastState;
199 
200 static net_protocol_module_info* sReceivingProtocol[256];
201 static mutex sReceivingProtocolLock;
202 
203 
204 static const char*
205 print_address(const in_addr* address, char* buf, size_t bufLen)
206 {
207 	unsigned int addr = ntohl(address->s_addr);
208 
209 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
210 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
211 
212 	return buf;
213 }
214 
215 
216 RawSocket::RawSocket(net_socket* socket)
217 	: DatagramSocket<>("ipv4 raw socket", socket)
218 {
219 }
220 
221 
222 //	#pragma mark -
223 
224 
225 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
226 	:
227 	fKey(key),
228 	fReceivedLastFragment(false),
229 	fBytesLeft(IP_MAXPACKET)
230 {
231 	gStackModule->init_timer(&fTimer, StaleTimer, this);
232 }
233 
234 
235 FragmentPacket::~FragmentPacket()
236 {
237 	// cancel the kill timer
238 	gStackModule->set_timer(&fTimer, -1);
239 
240 	// delete all fragments
241 	net_buffer* buffer;
242 	while ((buffer = fFragments.RemoveHead()) != NULL) {
243 		gBufferModule->free(buffer);
244 	}
245 }
246 
247 
248 status_t
249 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
250 	bool lastFragment)
251 {
252 	// restart the timer
253 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
254 
255 	if (start >= end) {
256 		// invalid fragment
257 		return B_BAD_DATA;
258 	}
259 
260 	// Search for a position in the list to insert the fragment
261 
262 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
263 	net_buffer* previous = NULL;
264 	net_buffer* next = NULL;
265 	while ((previous = iterator.Next()) != NULL) {
266 		if (previous->fragment.start <= start) {
267 			// The new fragment can be inserted after this one
268 			break;
269 		}
270 
271 		next = previous;
272 	}
273 
274 	// See if we already have the fragment's data
275 
276 	if (previous != NULL && previous->fragment.start <= start
277 		&& previous->fragment.end >= end) {
278 		// we do, so we can just drop this fragment
279 		gBufferModule->free(buffer);
280 		return B_OK;
281 	}
282 
283 	TRACE("    previous: %p, next: %p", previous, next);
284 
285 	// If we have parts of the data already, truncate as needed
286 
287 	if (previous != NULL && previous->fragment.end > start) {
288 		TRACE("    remove header %d bytes", previous->fragment.end - start);
289 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
290 		start = previous->fragment.end;
291 	}
292 	if (next != NULL && next->fragment.start < end) {
293 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
294 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
295 		end = next->fragment.start;
296 	}
297 
298 	// Now try if we can already merge the fragments together
299 
300 	// We will always keep the last buffer received, so that we can still
301 	// report an error (in which case we're not responsible for freeing it)
302 
303 	if (previous != NULL && previous->fragment.end == start) {
304 		fFragments.Remove(previous);
305 
306 		buffer->fragment.start = previous->fragment.start;
307 		buffer->fragment.end = end;
308 
309 		status_t status = gBufferModule->merge(buffer, previous, false);
310 		TRACE("    merge previous: %s", strerror(status));
311 		if (status != B_OK) {
312 			fFragments.Insert(next, previous);
313 			return status;
314 		}
315 
316 		fFragments.Insert(next, buffer);
317 
318 		// cut down existing hole
319 		fBytesLeft -= end - start;
320 
321 		if (lastFragment && !fReceivedLastFragment) {
322 			fReceivedLastFragment = true;
323 			fBytesLeft -= IP_MAXPACKET - end;
324 		}
325 
326 		TRACE("    hole length: %d", (int)fBytesLeft);
327 
328 		return B_OK;
329 	} else if (next != NULL && next->fragment.start == end) {
330 		fFragments.Remove(next);
331 
332 		buffer->fragment.start = start;
333 		buffer->fragment.end = next->fragment.end;
334 
335 		status_t status = gBufferModule->merge(buffer, next, true);
336 		TRACE("    merge next: %s", strerror(status));
337 		if (status != B_OK) {
338 			fFragments.Insert((net_buffer*)previous->link.next, next);
339 			return status;
340 		}
341 
342 		fFragments.Insert((net_buffer*)previous->link.next, buffer);
343 
344 		// cut down existing hole
345 		fBytesLeft -= end - start;
346 
347 		if (lastFragment && !fReceivedLastFragment) {
348 			fReceivedLastFragment = true;
349 			fBytesLeft -= IP_MAXPACKET - end;
350 		}
351 
352 		TRACE("    hole length: %d", (int)fBytesLeft);
353 
354 		return B_OK;
355 	}
356 
357 	// We couldn't merge the fragments, so we need to add it as is
358 
359 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
360 
361 	buffer->fragment.start = start;
362 	buffer->fragment.end = end;
363 	fFragments.Insert(next, buffer);
364 
365 	// update length of the hole, if any
366 	fBytesLeft -= end - start;
367 
368 	if (lastFragment && !fReceivedLastFragment) {
369 		fReceivedLastFragment = true;
370 		fBytesLeft -= IP_MAXPACKET - end;
371 	}
372 
373 	TRACE("    hole length: %d", (int)fBytesLeft);
374 
375 	return B_OK;
376 }
377 
378 
379 /*!	Reassembles the fragments to the specified buffer \a to.
380 	This buffer must have been added via AddFragment() before.
381 */
382 status_t
383 FragmentPacket::Reassemble(net_buffer* to)
384 {
385 	if (!IsComplete())
386 		return B_ERROR;
387 
388 	net_buffer* buffer = NULL;
389 
390 	net_buffer* fragment;
391 	while ((fragment = fFragments.RemoveHead()) != NULL) {
392 		if (buffer != NULL) {
393 			status_t status;
394 			if (to == fragment) {
395 				status = gBufferModule->merge(fragment, buffer, false);
396 				buffer = fragment;
397 			} else
398 				status = gBufferModule->merge(buffer, fragment, true);
399 			if (status != B_OK)
400 				return status;
401 		} else
402 			buffer = fragment;
403 	}
404 
405 	if (buffer != to)
406 		panic("ipv4 packet reassembly did not work correctly.\n");
407 
408 	return B_OK;
409 }
410 
411 
412 int
413 FragmentPacket::Compare(void* _packet, const void* _key)
414 {
415 	const ipv4_packet_key* key = (ipv4_packet_key*)_key;
416 	ipv4_packet_key* packetKey = &((FragmentPacket*)_packet)->fKey;
417 
418 	if (packetKey->id == key->id
419 		&& packetKey->source == key->source
420 		&& packetKey->destination == key->destination
421 		&& packetKey->protocol == key->protocol)
422 		return 0;
423 
424 	return 1;
425 }
426 
427 
428 uint32
429 FragmentPacket::Hash(void* _packet, const void* _key, uint32 range)
430 {
431 	const struct ipv4_packet_key* key = (struct ipv4_packet_key*)_key;
432 	FragmentPacket* packet = (FragmentPacket*)_packet;
433 	if (packet != NULL)
434 		key = &packet->fKey;
435 
436 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
437 }
438 
439 
440 /*static*/ void
441 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
442 {
443 	FragmentPacket* packet = (FragmentPacket*)data;
444 	TRACE("Assembling FragmentPacket %p timed out!", packet);
445 
446 	MutexLocker locker(&sFragmentLock);
447 
448 	hash_remove(sFragmentHash, packet);
449 	delete packet;
450 }
451 
452 
453 //	#pragma mark -
454 
455 
456 #if 0
457 static void
458 dump_ipv4_header(ipv4_header &header)
459 {
460 	struct pretty_ipv4 {
461 	#if B_HOST_IS_LENDIAN == 1
462 		uint8 a;
463 		uint8 b;
464 		uint8 c;
465 		uint8 d;
466 	#else
467 		uint8 d;
468 		uint8 c;
469 		uint8 b;
470 		uint8 a;
471 	#endif
472 	};
473 	struct pretty_ipv4* src = (struct pretty_ipv4*)&header.source;
474 	struct pretty_ipv4* dst = (struct pretty_ipv4*)&header.destination;
475 	dprintf("  version: %d\n", header.version);
476 	dprintf("  header_length: 4 * %d\n", header.header_length);
477 	dprintf("  service_type: %d\n", header.service_type);
478 	dprintf("  total_length: %d\n", header.TotalLength());
479 	dprintf("  id: %d\n", ntohs(header.id));
480 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
481 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
482 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
483 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
484 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
485 	dprintf("  time_to_live: %d\n", header.time_to_live);
486 	dprintf("  protocol: %d\n", header.protocol);
487 	dprintf("  checksum: %d\n", ntohs(header.checksum));
488 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
489 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
490 }
491 #endif
492 
493 
494 static int
495 dump_ipv4_multicast(int argc, char** argv)
496 {
497 	MulticastState::Iterator it = sMulticastState->GetIterator();
498 
499 	while (it.HasNext()) {
500 		IPv4GroupInterface* state = it.Next();
501 
502 		char addressBuffer[64];
503 
504 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
505 			print_address(&state->Address(), addressBuffer,
506 			sizeof(addressBuffer)),
507 			state->Mode() == IPv4GroupInterface::kExclude
508 				? "Exclude" : "Include");
509 
510 		int count = 0;
511 		IPv4GroupInterface::AddressSet::Iterator it
512 			= state->Sources().GetIterator();
513 		while (it.HasNext()) {
514 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
515 				addressBuffer, sizeof(addressBuffer)));
516 			count++;
517 		}
518 
519 		kprintf("}> sock %p\n", state->Parent()->Socket());
520 	}
521 
522 	return 0;
523 }
524 
525 
526 /*!	Attempts to re-assemble fragmented packets.
527 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
528 		will point to its buffer, otherwise, it will be \c NULL.
529 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
530 */
531 static status_t
532 reassemble_fragments(const ipv4_header &header, net_buffer** _buffer)
533 {
534 	net_buffer* buffer = *_buffer;
535 	status_t status;
536 
537 	struct ipv4_packet_key key;
538 	key.source = (in_addr_t)header.source;
539 	key.destination = (in_addr_t)header.destination;
540 	key.id = header.id;
541 	key.protocol = header.protocol;
542 
543 	// TODO: Make locking finer grained.
544 	MutexLocker locker(&sFragmentLock);
545 
546 	FragmentPacket* packet = (FragmentPacket*)hash_lookup(sFragmentHash, &key);
547 	if (packet == NULL) {
548 		// New fragment packet
549 		packet = new (std::nothrow) FragmentPacket(key);
550 		if (packet == NULL)
551 			return B_NO_MEMORY;
552 
553 		// add packet to hash
554 		status = hash_insert(sFragmentHash, packet);
555 		if (status != B_OK) {
556 			delete packet;
557 			return status;
558 		}
559 	}
560 
561 	uint16 fragmentOffset = header.FragmentOffset();
562 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
563 	uint16 end = start + header.TotalLength() - header.HeaderLength();
564 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
565 
566 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
567 		lastFragment ? "last ": "", end - start, start);
568 
569 	// Remove header unless this is the first fragment
570 	if (start != 0)
571 		gBufferModule->remove_header(buffer, header.HeaderLength());
572 
573 	status = packet->AddFragment(start, end, buffer, lastFragment);
574 	if (status != B_OK)
575 		return status;
576 
577 	if (packet->IsComplete()) {
578 		hash_remove(sFragmentHash, packet);
579 			// no matter if reassembling succeeds, we won't need this packet
580 			// anymore
581 
582 		status = packet->Reassemble(buffer);
583 		delete packet;
584 
585 		// _buffer does not change
586 		return status;
587 	}
588 
589 	// This indicates that the packet is not yet complete
590 	*_buffer = NULL;
591 	return B_OK;
592 }
593 
594 
595 /*!	Fragments the incoming buffer and send all fragments via the specified
596 	\a route.
597 */
598 static status_t
599 send_fragments(ipv4_protocol* protocol, struct net_route* route,
600 	net_buffer* buffer, uint32 mtu)
601 {
602 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
603 
604 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
605 	if (originalHeader.Status() != B_OK)
606 		return originalHeader.Status();
607 
608 	uint16 headerLength = originalHeader->HeaderLength();
609 	uint32 bytesLeft = buffer->size - headerLength;
610 	uint32 fragmentOffset = 0;
611 	status_t status = B_OK;
612 
613 	net_buffer* headerBuffer = gBufferModule->split(buffer, headerLength);
614 	if (headerBuffer == NULL)
615 		return B_NO_MEMORY;
616 
617 	// TODO: we need to make sure ipv4_header is contiguous or
618 	// use another construct.
619 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
620 	ipv4_header* header = &bufferHeader.Data();
621 
622 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
623 	// this way)
624 	mtu -= headerLength;
625 	mtu &= ~7;
626 	TRACE("  adjusted MTU to %ld\n", mtu);
627 
628 	TRACE("  bytesLeft = %ld\n", bytesLeft);
629 	while (bytesLeft > 0) {
630 		uint32 fragmentLength = min_c(bytesLeft, mtu);
631 		bytesLeft -= fragmentLength;
632 		bool lastFragment = bytesLeft == 0;
633 
634 		header->total_length = htons(fragmentLength + headerLength);
635 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
636 			| (fragmentOffset >> 3));
637 		header->checksum = 0;
638 		header->checksum = gStackModule->checksum((uint8*)header,
639 			headerLength);
640 			// TODO: compute the checksum only for those parts that changed?
641 
642 		TRACE("  send fragment of %ld bytes (%ld bytes left)\n", fragmentLength, bytesLeft);
643 
644 		net_buffer* fragmentBuffer;
645 		if (!lastFragment) {
646 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
647 			fragmentOffset += fragmentLength;
648 		} else
649 			fragmentBuffer = buffer;
650 
651 		if (fragmentBuffer == NULL) {
652 			status = B_NO_MEMORY;
653 			break;
654 		}
655 
656 		// copy header to fragment
657 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
658 
659 		// send fragment
660 		if (status == B_OK)
661 			status = sDatalinkModule->send_data(route, fragmentBuffer);
662 
663 		if (lastFragment) {
664 			// we don't own the last buffer, so we don't have to free it
665 			break;
666 		}
667 
668 		if (status != B_OK) {
669 			gBufferModule->free(fragmentBuffer);
670 			break;
671 		}
672 	}
673 
674 	gBufferModule->free(headerBuffer);
675 	return status;
676 }
677 
678 
679 static status_t
680 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
681 	bool deliverToRaw)
682 {
683 	if (module->deliver_data == NULL)
684 		return B_OK;
685 
686 	MutexLocker _(sMulticastGroupsLock);
687 
688 	sockaddr_in* multicastAddr = (sockaddr_in*)buffer->destination;
689 
690 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
691 		&multicastAddr->sin_addr, buffer->interface->index));
692 
693 	while (it.HasNext()) {
694 		IPv4GroupInterface* state = it.Next();
695 
696 		if (deliverToRaw && state->Parent()->Socket()->raw == NULL)
697 			continue;
698 
699 		if (state->FilterAccepts(buffer)) {
700 			// as Multicast filters are installed with an IPv4 protocol
701 			// reference, we need to go and find the appropriate instance
702 			// related to the 'receiving protocol' with module 'module'.
703 			net_protocol* proto
704 				= state->Parent()->Socket()->socket->first_protocol;
705 
706 			while (proto && proto->module != module)
707 				proto = proto->next;
708 
709 			if (proto)
710 				module->deliver_data(proto, buffer);
711 		}
712 	}
713 
714 	return B_OK;
715 }
716 
717 
718 static void
719 raw_receive_data(net_buffer* buffer)
720 {
721 	MutexLocker locker(sRawSocketsLock);
722 
723 	if (sRawSockets.IsEmpty())
724 		return;
725 
726 	TRACE("RawReceiveData(%i)", buffer->protocol);
727 
728 	if (buffer->flags & MSG_MCAST) {
729 		// we need to call deliver_multicast here separately as
730 		// buffer still has the IP header, and it won't in the
731 		// next call. This isn't very optimized but works for now.
732 		// A better solution would be to hold separate hash tables
733 		// and lists for RAW and non-RAW sockets.
734 		deliver_multicast(&gIPv4Module, buffer, true);
735 	} else {
736 		RawSocketList::Iterator iterator = sRawSockets.GetIterator();
737 
738 		while (iterator.HasNext()) {
739 			RawSocket* raw = iterator.Next();
740 
741 			if (raw->Socket()->protocol == buffer->protocol)
742 				raw->SocketEnqueue(buffer);
743 		}
744 	}
745 }
746 
747 
748 static sockaddr*
749 fill_sockaddr_in(sockaddr_in* destination, const in_addr &source)
750 {
751 	memset(destination, 0, sizeof(sockaddr_in));
752 	destination->sin_family = AF_INET;
753 	destination->sin_addr = source;
754 	return (sockaddr*)destination;
755 }
756 
757 
758 status_t
759 IPv4Multicast::JoinGroup(IPv4GroupInterface* state)
760 {
761 	MutexLocker _(sMulticastGroupsLock);
762 
763 	sockaddr_in groupAddr;
764 	net_interface* interface = state->Interface();
765 
766 	status_t status = interface->first_info->join_multicast(
767 		interface->first_protocol,
768 		fill_sockaddr_in(&groupAddr, state->Address()));
769 	if (status != B_OK)
770 		return status;
771 
772 	sMulticastState->Insert(state);
773 	return B_OK;
774 }
775 
776 
777 status_t
778 IPv4Multicast::LeaveGroup(IPv4GroupInterface* state)
779 {
780 	MutexLocker _(sMulticastGroupsLock);
781 
782 	sMulticastState->Remove(state);
783 
784 	sockaddr_in groupAddr;
785 	net_interface* interface = state->Interface();
786 
787 	return interface->first_protocol->module->join_multicast(
788 		interface->first_protocol,
789 		fill_sockaddr_in(&groupAddr, state->Address()));
790 }
791 
792 
793 static net_protocol_module_info*
794 receiving_protocol(uint8 protocol)
795 {
796 	net_protocol_module_info* module = sReceivingProtocol[protocol];
797 	if (module != NULL)
798 		return module;
799 
800 	MutexLocker locker(sReceivingProtocolLock);
801 
802 	module = sReceivingProtocol[protocol];
803 	if (module != NULL)
804 		return module;
805 
806 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
807 			&module) == B_OK)
808 		sReceivingProtocol[protocol] = module;
809 
810 	return module;
811 }
812 
813 
814 static inline sockaddr*
815 fill_sockaddr_in(sockaddr_in* target, in_addr_t address)
816 {
817 	memset(target, 0, sizeof(sockaddr_in));
818 	target->sin_family = AF_INET;
819 	target->sin_len = sizeof(sockaddr_in);
820 	target->sin_addr.s_addr = address;
821 	return (sockaddr*)target;
822 }
823 
824 
825 static status_t
826 ipv4_delta_group(IPv4GroupInterface* group, int option,
827 	net_interface* interface, const in_addr* sourceAddr)
828 {
829 	switch (option) {
830 		case IP_ADD_MEMBERSHIP:
831 			return group->Add();
832 		case IP_DROP_MEMBERSHIP:
833 			return group->Drop();
834 		case IP_BLOCK_SOURCE:
835 			return group->BlockSource(*sourceAddr);
836 		case IP_UNBLOCK_SOURCE:
837 			return group->UnblockSource(*sourceAddr);
838 		case IP_ADD_SOURCE_MEMBERSHIP:
839 			return group->AddSSM(*sourceAddr);
840 		case IP_DROP_SOURCE_MEMBERSHIP:
841 			return group->DropSSM(*sourceAddr);
842 	}
843 
844 	return B_ERROR;
845 }
846 
847 
848 static status_t
849 ipv4_delta_membership(ipv4_protocol* protocol, int option,
850 	net_interface* interface, const in_addr* groupAddr,
851 	const in_addr* sourceAddr)
852 {
853 	IPv4MulticastFilter &filter = protocol->multicast_filter;
854 	IPv4GroupInterface* state = NULL;
855 	status_t status = B_OK;
856 
857 	switch (option) {
858 		case IP_ADD_MEMBERSHIP:
859 		case IP_ADD_SOURCE_MEMBERSHIP:
860 			status = filter.GetState(*groupAddr, interface, state, true);
861 			break;
862 
863 		case IP_DROP_MEMBERSHIP:
864 		case IP_BLOCK_SOURCE:
865 		case IP_UNBLOCK_SOURCE:
866 		case IP_DROP_SOURCE_MEMBERSHIP:
867 			filter.GetState(*groupAddr, interface, state, false);
868 			if (state == NULL) {
869 				if (option == IP_DROP_MEMBERSHIP
870 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
871 					return EADDRNOTAVAIL;
872 
873 				return B_BAD_VALUE;
874 			}
875 			break;
876 	}
877 
878 	if (status != B_OK)
879 		return status;
880 
881 	status = ipv4_delta_group(state, option, interface, sourceAddr);
882 	filter.ReturnState(state);
883 	return status;
884 }
885 
886 
887 static int
888 generic_to_ipv4(int option)
889 {
890 	switch (option) {
891 		case MCAST_JOIN_GROUP:
892 			return IP_ADD_MEMBERSHIP;
893 		case MCAST_JOIN_SOURCE_GROUP:
894 			return IP_ADD_SOURCE_MEMBERSHIP;
895 		case MCAST_LEAVE_GROUP:
896 			return IP_DROP_MEMBERSHIP;
897 		case MCAST_BLOCK_SOURCE:
898 			return IP_BLOCK_SOURCE;
899 		case MCAST_UNBLOCK_SOURCE:
900 			return IP_UNBLOCK_SOURCE;
901 		case MCAST_LEAVE_SOURCE_GROUP:
902 			return IP_DROP_SOURCE_MEMBERSHIP;
903 	}
904 
905 	return -1;
906 }
907 
908 
909 static net_interface*
910 get_multicast_interface(ipv4_protocol* protocol, const in_addr* address)
911 {
912 	sockaddr_in groupAddr;
913 	net_route* route = sDatalinkModule->get_route(sDomain,
914 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
915 	if (route == NULL)
916 		return NULL;
917 
918 	return route->interface;
919 }
920 
921 
922 static status_t
923 ipv4_delta_membership(ipv4_protocol* protocol, int option,
924 	in_addr* interfaceAddr, in_addr* groupAddr, in_addr* sourceAddr)
925 {
926 	net_interface* interface = NULL;
927 
928 	if (interfaceAddr->s_addr == INADDR_ANY) {
929 		interface = get_multicast_interface(protocol, groupAddr);
930 	} else {
931 		sockaddr_in address;
932 		interface = sDatalinkModule->get_interface_with_address(sDomain,
933 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
934 	}
935 
936 	if (interface == NULL)
937 		return ENODEV;
938 
939 	return ipv4_delta_membership(protocol, option, interface,
940 		groupAddr, sourceAddr);
941 }
942 
943 
944 static status_t
945 ipv4_generic_delta_membership(ipv4_protocol* protocol, int option,
946 	uint32 index, const sockaddr_storage* _groupAddr,
947 	const sockaddr_storage* _sourceAddr)
948 {
949 	if (_groupAddr->ss_family != AF_INET)
950 		return B_BAD_VALUE;
951 
952 	if (_sourceAddr && _sourceAddr->ss_family != AF_INET)
953 		return B_BAD_VALUE;
954 
955 	const in_addr* groupAddr = &((const sockaddr_in*)_groupAddr)->sin_addr;
956 
957 	net_interface* interface;
958 	if (index == 0)
959 		interface = get_multicast_interface(protocol, groupAddr);
960 	else
961 		interface = sDatalinkModule->get_interface(sDomain, index);
962 
963 	if (interface == NULL)
964 		return ENODEV;
965 
966 	const in_addr* sourceAddr = NULL;
967 	if (_sourceAddr)
968 		sourceAddr = &((const sockaddr_in*)_sourceAddr)->sin_addr;
969 
970 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
971 		groupAddr, sourceAddr);
972 }
973 
974 
975 static status_t
976 get_int_option(void* target, size_t length, int value)
977 {
978 	if (length != sizeof(int))
979 		return B_BAD_VALUE;
980 
981 	return user_memcpy(target, &value, sizeof(int));
982 }
983 
984 
985 template<typename Type> static status_t
986 set_int_option(Type &target, const void* _value, size_t length)
987 {
988 	int value;
989 
990 	if (length != sizeof(int))
991 		return B_BAD_VALUE;
992 
993 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
994 		return B_BAD_ADDRESS;
995 
996 	target = value;
997 	return B_OK;
998 }
999 
1000 
1001 //	#pragma mark -
1002 
1003 
1004 net_protocol*
1005 ipv4_init_protocol(net_socket* socket)
1006 {
1007 	ipv4_protocol* protocol = new (std::nothrow) ipv4_protocol();
1008 	if (protocol == NULL)
1009 		return NULL;
1010 
1011 	protocol->raw = NULL;
1012 	protocol->service_type = 0;
1013 	protocol->time_to_live = kDefaultTTL;
1014 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
1015 	protocol->flags = 0;
1016 	return protocol;
1017 }
1018 
1019 
1020 status_t
1021 ipv4_uninit_protocol(net_protocol* _protocol)
1022 {
1023 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1024 
1025 	delete protocol->raw;
1026 	delete protocol;
1027 	return B_OK;
1028 }
1029 
1030 
1031 /*!	Since open() is only called on the top level protocol, when we get here
1032 	it means we are on a SOCK_RAW socket.
1033 */
1034 status_t
1035 ipv4_open(net_protocol* _protocol)
1036 {
1037 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1038 
1039 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1040 	if (raw == NULL)
1041 		return B_NO_MEMORY;
1042 
1043 	status_t status = raw->InitCheck();
1044 	if (status != B_OK) {
1045 		delete raw;
1046 		return status;
1047 	}
1048 
1049 	TRACE_SK(protocol, "Open()");
1050 
1051 	protocol->raw = raw;
1052 
1053 	MutexLocker locker(sRawSocketsLock);
1054 	sRawSockets.Add(raw);
1055 	return B_OK;
1056 }
1057 
1058 
1059 status_t
1060 ipv4_close(net_protocol* _protocol)
1061 {
1062 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1063 	RawSocket* raw = protocol->raw;
1064 	if (raw == NULL)
1065 		return B_ERROR;
1066 
1067 	TRACE_SK(protocol, "Close()");
1068 
1069 	MutexLocker locker(sRawSocketsLock);
1070 	sRawSockets.Remove(raw);
1071 	delete raw;
1072 	protocol->raw = NULL;
1073 
1074 	return B_OK;
1075 }
1076 
1077 
1078 status_t
1079 ipv4_free(net_protocol* protocol)
1080 {
1081 	return B_OK;
1082 }
1083 
1084 
1085 status_t
1086 ipv4_connect(net_protocol* protocol, const struct sockaddr* address)
1087 {
1088 	return B_ERROR;
1089 }
1090 
1091 
1092 status_t
1093 ipv4_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1094 {
1095 	return EOPNOTSUPP;
1096 }
1097 
1098 
1099 status_t
1100 ipv4_control(net_protocol* _protocol, int level, int option, void* value,
1101 	size_t* _length)
1102 {
1103 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1104 		return sDatalinkModule->control(sDomain, option, value, _length);
1105 
1106 	return B_BAD_VALUE;
1107 }
1108 
1109 
1110 status_t
1111 ipv4_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1112 	int* _length)
1113 {
1114 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1115 
1116 	if (level == IPPROTO_IP) {
1117 		if (option == IP_HDRINCL) {
1118 			return get_int_option(value, *_length,
1119 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1120 		}
1121 		if (option == IP_TTL)
1122 			return get_int_option(value, *_length, protocol->time_to_live);
1123 		if (option == IP_TOS)
1124 			return get_int_option(value, *_length, protocol->service_type);
1125 		if (option == IP_MULTICAST_TTL) {
1126 			return get_int_option(value, *_length,
1127 				protocol->multicast_time_to_live);
1128 		}
1129 		if (option == IP_ADD_MEMBERSHIP
1130 			|| option == IP_DROP_MEMBERSHIP
1131 			|| option == IP_BLOCK_SOURCE
1132 			|| option == IP_UNBLOCK_SOURCE
1133 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1134 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1135 			|| option == MCAST_JOIN_GROUP
1136 			|| option == MCAST_LEAVE_GROUP
1137 			|| option == MCAST_BLOCK_SOURCE
1138 			|| option == MCAST_UNBLOCK_SOURCE
1139 			|| option == MCAST_JOIN_SOURCE_GROUP
1140 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1141 			// RFC 3678, Section 4.1:
1142 			// ``An error of EOPNOTSUPP is returned if these options are
1143 			// used with getsockopt().''
1144 			return EOPNOTSUPP;
1145 		}
1146 
1147 		dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1148 		return ENOPROTOOPT;
1149 	}
1150 
1151 	return sSocketModule->get_option(protocol->socket, level, option, value,
1152 		_length);
1153 }
1154 
1155 
1156 status_t
1157 ipv4_setsockopt(net_protocol* _protocol, int level, int option,
1158 	const void* value, int length)
1159 {
1160 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1161 
1162 	if (level == IPPROTO_IP) {
1163 		if (option == IP_HDRINCL) {
1164 			int headerIncluded;
1165 			if (length != sizeof(int))
1166 				return B_BAD_VALUE;
1167 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded))
1168 					!= B_OK)
1169 				return B_BAD_ADDRESS;
1170 
1171 			if (headerIncluded)
1172 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1173 			else
1174 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1175 
1176 			return B_OK;
1177 		}
1178 		if (option == IP_TTL)
1179 			return set_int_option(protocol->time_to_live, value, length);
1180 		if (option == IP_TOS)
1181 			return set_int_option(protocol->service_type, value, length);
1182 		if (option == IP_MULTICAST_TTL) {
1183 			return set_int_option(protocol->multicast_time_to_live, value,
1184 				length);
1185 		}
1186 		if (option == IP_ADD_MEMBERSHIP || option == IP_DROP_MEMBERSHIP) {
1187 			ip_mreq mreq;
1188 			if (length != sizeof(ip_mreq))
1189 				return B_BAD_VALUE;
1190 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) != B_OK)
1191 				return B_BAD_ADDRESS;
1192 
1193 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1194 				&mreq.imr_multiaddr, NULL);
1195 		}
1196 		if (option == IP_BLOCK_SOURCE
1197 			|| option == IP_UNBLOCK_SOURCE
1198 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1199 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1200 			ip_mreq_source mreq;
1201 			if (length != sizeof(ip_mreq_source))
1202 				return B_BAD_VALUE;
1203 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) != B_OK)
1204 				return B_BAD_ADDRESS;
1205 
1206 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1207 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1208 		}
1209 		if (option == MCAST_LEAVE_GROUP || option == MCAST_JOIN_GROUP) {
1210 			group_req greq;
1211 			if (length != sizeof(group_req))
1212 				return B_BAD_VALUE;
1213 			if (user_memcpy(&greq, value, sizeof(group_req)) != B_OK)
1214 				return B_BAD_ADDRESS;
1215 
1216 			return ipv4_generic_delta_membership(protocol, option,
1217 				greq.gr_interface, &greq.gr_group, NULL);
1218 		}
1219 		if (option == MCAST_BLOCK_SOURCE
1220 			|| option == MCAST_UNBLOCK_SOURCE
1221 			|| option == MCAST_JOIN_SOURCE_GROUP
1222 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1223 			group_source_req greq;
1224 			if (length != sizeof(group_source_req))
1225 				return B_BAD_VALUE;
1226 			if (user_memcpy(&greq, value, sizeof(group_source_req)) != B_OK)
1227 				return B_BAD_ADDRESS;
1228 
1229 			return ipv4_generic_delta_membership(protocol, option,
1230 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1231 		}
1232 
1233 		dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1234 		return ENOPROTOOPT;
1235 	}
1236 
1237 	return sSocketModule->set_option(protocol->socket, level, option,
1238 		value, length);
1239 }
1240 
1241 
1242 status_t
1243 ipv4_bind(net_protocol* protocol, const struct sockaddr* address)
1244 {
1245 	if (address->sa_family != AF_INET)
1246 		return EAFNOSUPPORT;
1247 
1248 	// only INADDR_ANY and addresses of local interfaces are accepted:
1249 	if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY
1250 		|| IN_MULTICAST(ntohl(((sockaddr_in*)address)->sin_addr.s_addr))
1251 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1252 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1253 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1254 			// explicitly set length, as our callers can't be trusted to
1255 			// always provide the correct length!
1256 		return B_OK;
1257 	}
1258 
1259 	return B_ERROR;
1260 		// address is unknown on this host
1261 }
1262 
1263 
1264 status_t
1265 ipv4_unbind(net_protocol* protocol, struct sockaddr* address)
1266 {
1267 	// nothing to do here
1268 	return B_OK;
1269 }
1270 
1271 
1272 status_t
1273 ipv4_listen(net_protocol* protocol, int count)
1274 {
1275 	return EOPNOTSUPP;
1276 }
1277 
1278 
1279 status_t
1280 ipv4_shutdown(net_protocol* protocol, int direction)
1281 {
1282 	return EOPNOTSUPP;
1283 }
1284 
1285 
1286 status_t
1287 ipv4_send_routed_data(net_protocol* _protocol, struct net_route* route,
1288 	net_buffer* buffer)
1289 {
1290 	if (route == NULL)
1291 		return B_BAD_VALUE;
1292 
1293 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1294 	net_interface* interface = route->interface;
1295 
1296 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1297 		buffer->size);
1298 
1299 	sockaddr_in& source = *(sockaddr_in*)buffer->source;
1300 	sockaddr_in& destination = *(sockaddr_in*)buffer->destination;
1301 	sockaddr_in& broadcastAddress = *(sockaddr_in*)interface->destination;
1302 
1303 	bool headerIncluded = false, checksumNeeded = true;
1304 	if (protocol != NULL)
1305 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1306 
1307 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1308 
1309 	if (destination.sin_addr.s_addr == INADDR_ANY)
1310 		return EDESTADDRREQ;
1311 
1312 	if ((interface->device->flags & IFF_BROADCAST) != 0
1313 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1314 			|| destination.sin_addr.s_addr
1315 				== broadcastAddress.sin_addr.s_addr)) {
1316 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1317 			return B_BAD_VALUE;
1318 		buffer->flags |= MSG_BCAST;
1319 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr)))
1320 		buffer->flags |= MSG_MCAST;
1321 
1322 	// Add IP header (if needed)
1323 
1324 	if (!headerIncluded) {
1325 		NetBufferPrepend<ipv4_header> header(buffer);
1326 		if (header.Status() != B_OK)
1327 			return header.Status();
1328 
1329 		header->version = IP_VERSION;
1330 		header->header_length = sizeof(ipv4_header) / 4;
1331 		header->service_type = protocol ? protocol->service_type : 0;
1332 		header->total_length = htons(buffer->size);
1333 		header->id = htons(atomic_add(&sPacketID, 1));
1334 		header->fragment_offset = 0;
1335 		if (protocol) {
1336 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1337 				? protocol->multicast_time_to_live : protocol->time_to_live;
1338 		} else {
1339 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1340 				? kDefaultMulticastTTL : kDefaultTTL;
1341 		}
1342 		header->protocol = protocol
1343 			? protocol->socket->protocol : buffer->protocol;
1344 		header->checksum = 0;
1345 
1346 		header->source = source.sin_addr.s_addr;
1347 		header->destination = destination.sin_addr.s_addr;
1348 	} else {
1349 		// if IP_HDRINCL, check if the source address is set
1350 		NetBufferHeaderReader<ipv4_header> header(buffer);
1351 		if (header.Status() != B_OK)
1352 			return header.Status();
1353 
1354 		if (header->source == 0) {
1355 			header->source = source.sin_addr.s_addr;
1356 			header->checksum = 0;
1357 			header.Sync();
1358 		} else
1359 			checksumNeeded = false;
1360 	}
1361 
1362 	if (buffer->size > 0xffff)
1363 		return EMSGSIZE;
1364 
1365 	if (checksumNeeded) {
1366 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1367 			sizeof(ipv4_header), true);
1368 	}
1369 
1370 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer "
1371 		"checksum: %ld",
1372 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1373 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1374 
1375 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08lx",
1376 		ntohl(destination.sin_addr.s_addr));
1377 
1378 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1379 	if (buffer->size > mtu) {
1380 		// we need to fragment the packet
1381 		return send_fragments(protocol, route, buffer, mtu);
1382 	}
1383 
1384 	return sDatalinkModule->send_data(route, buffer);
1385 }
1386 
1387 
1388 status_t
1389 ipv4_send_data(net_protocol* _protocol, net_buffer* buffer)
1390 {
1391 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1392 
1393 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1394 
1395 	if (protocol && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1396 		if (buffer->size < sizeof(ipv4_header))
1397 			return B_BAD_VALUE;
1398 
1399 		sockaddr_in* source = (sockaddr_in*)buffer->source;
1400 		sockaddr_in* destination = (sockaddr_in*)buffer->destination;
1401 
1402 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1403 			offsetof(ipv4_header, source)>(buffer));
1404 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1405 			offsetof(ipv4_header, destination)>(buffer));
1406 	}
1407 
1408 	return sDatalinkModule->send_datagram(protocol, sDomain, buffer);
1409 }
1410 
1411 
1412 ssize_t
1413 ipv4_send_avail(net_protocol* protocol)
1414 {
1415 	return B_ERROR;
1416 }
1417 
1418 
1419 status_t
1420 ipv4_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1421 	net_buffer** _buffer)
1422 {
1423 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1424 	RawSocket* raw = protocol->raw;
1425 	if (raw == NULL)
1426 		return B_ERROR;
1427 
1428 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1429 
1430 	return raw->SocketDequeue(flags, _buffer);
1431 }
1432 
1433 
1434 ssize_t
1435 ipv4_read_avail(net_protocol* _protocol)
1436 {
1437 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1438 	RawSocket* raw = protocol->raw;
1439 	if (raw == NULL)
1440 		return B_ERROR;
1441 
1442 	return raw->AvailableData();
1443 }
1444 
1445 
1446 struct net_domain*
1447 ipv4_get_domain(net_protocol* protocol)
1448 {
1449 	return sDomain;
1450 }
1451 
1452 
1453 size_t
1454 ipv4_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1455 {
1456 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1457 	if (route == NULL)
1458 		return 0;
1459 
1460 	size_t mtu;
1461 	if (route->mtu != 0)
1462 		mtu = route->mtu;
1463 	else
1464 		mtu = route->interface->mtu;
1465 
1466 	sDatalinkModule->put_route(sDomain, route);
1467 	return mtu - sizeof(ipv4_header);
1468 }
1469 
1470 
1471 status_t
1472 ipv4_receive_data(net_buffer* buffer)
1473 {
1474 	TRACE("ReceiveData(%p [%ld bytes])", buffer, buffer->size);
1475 
1476 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1477 	if (bufferHeader.Status() != B_OK)
1478 		return bufferHeader.Status();
1479 
1480 	ipv4_header &header = bufferHeader.Data();
1481 	//dump_ipv4_header(header);
1482 
1483 	if (header.version != IP_VERSION)
1484 		return B_BAD_TYPE;
1485 
1486 	uint16 packetLength = header.TotalLength();
1487 	uint16 headerLength = header.HeaderLength();
1488 	if (packetLength > buffer->size
1489 		|| headerLength < sizeof(ipv4_header))
1490 		return B_BAD_DATA;
1491 
1492 	// TODO: would be nice to have a direct checksum function somewhere
1493 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1494 		return B_BAD_DATA;
1495 
1496 	struct sockaddr_in& source = *(struct sockaddr_in*)buffer->source;
1497 	struct sockaddr_in& destination = *(struct sockaddr_in*)buffer->destination;
1498 
1499 	fill_sockaddr_in(&source, header.source);
1500 	fill_sockaddr_in(&destination, header.destination);
1501 
1502 	// lower layers notion of Broadcast or Multicast have no relevance to us
1503 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1504 
1505 	if (header.destination == INADDR_BROADCAST) {
1506 		buffer->flags |= MSG_BCAST;
1507 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1508 		buffer->flags |= MSG_MCAST;
1509 	} else {
1510 		uint32 matchedAddressType = 0;
1511 		// test if the packet is really for us
1512 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1513 			&buffer->interface, &matchedAddressType)) {
1514 			TRACE("  ReceiveData(): packet was not for us %lx -> %lx",
1515 				ntohl(header.source), ntohl(header.destination));
1516 			return B_ERROR;
1517 		}
1518 
1519 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1520 		buffer->flags |= matchedAddressType;
1521 	}
1522 
1523 	uint8 protocol = buffer->protocol = header.protocol;
1524 
1525 	// remove any trailing/padding data
1526 	status_t status = gBufferModule->trim(buffer, packetLength);
1527 	if (status != B_OK)
1528 		return status;
1529 
1530 	// check for fragmentation
1531 	uint16 fragmentOffset = ntohs(header.fragment_offset);
1532 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1533 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1534 		// this is a fragment
1535 		TRACE("  ReceiveData(): Found a Fragment!");
1536 		status = reassemble_fragments(header, &buffer);
1537 		TRACE("  ReceiveData():  -> %s", strerror(status));
1538 		if (status != B_OK)
1539 			return status;
1540 
1541 		if (buffer == NULL) {
1542 			// buffer was put into fragment packet
1543 			TRACE("  ReceiveData(): Not yet assembled.");
1544 			return B_OK;
1545 		}
1546 	}
1547 
1548 	// Since the buffer might have been changed (reassembled fragment)
1549 	// we must no longer access bufferHeader or header anymore after
1550 	// this point
1551 
1552 	raw_receive_data(buffer);
1553 
1554 	gBufferModule->remove_header(buffer, headerLength);
1555 		// the header is of variable size and may include IP options
1556 		// (that we ignore for now)
1557 
1558 	net_protocol_module_info* module = receiving_protocol(protocol);
1559 	if (module == NULL) {
1560 		// no handler for this packet
1561 		return EAFNOSUPPORT;
1562 	}
1563 
1564 	if ((buffer->flags & MSG_MCAST) != 0) {
1565 		// Unfortunely historical reasons dictate that the IP multicast
1566 		// model be a little different from the unicast one. We deliver
1567 		// this frame directly to all sockets registered with interest
1568 		// for this multicast group.
1569 		return deliver_multicast(module, buffer, false);
1570 	}
1571 
1572 	return module->receive_data(buffer);
1573 }
1574 
1575 
1576 status_t
1577 ipv4_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1578 {
1579 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1580 
1581 	if (protocol->raw == NULL)
1582 		return B_ERROR;
1583 
1584 	return protocol->raw->SocketEnqueue(buffer);
1585 }
1586 
1587 
1588 status_t
1589 ipv4_error(uint32 code, net_buffer* data)
1590 {
1591 	return B_ERROR;
1592 }
1593 
1594 
1595 status_t
1596 ipv4_error_reply(net_protocol* protocol, net_buffer* causedError, uint32 code,
1597 	void* errorData)
1598 {
1599 	return B_ERROR;
1600 }
1601 
1602 
1603 //	#pragma mark -
1604 
1605 
1606 status_t
1607 init_ipv4()
1608 {
1609 	sPacketID = (int32)system_time();
1610 
1611 	mutex_init(&sRawSocketsLock, "raw sockets");
1612 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1613 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1614 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1615 
1616 	status_t status;
1617 
1618 	sMulticastState = new MulticastState();
1619 	if (sMulticastState == NULL) {
1620 		status = B_NO_MEMORY;
1621 		goto err4;
1622 	}
1623 
1624 	status = sMulticastState->Init();
1625 	if (status != B_OK)
1626 		goto err5;
1627 
1628 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1629 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1630 	if (sFragmentHash == NULL)
1631 		goto err5;
1632 
1633 	new (&sRawSockets) RawSocketList;
1634 		// static initializers do not work in the kernel,
1635 		// so we have to do it here, manually
1636 		// TODO: for modules, this shouldn't be required
1637 
1638 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1639 		"network/protocols/ipv4/v1", NULL);
1640 	if (status != B_OK)
1641 		goto err6;
1642 
1643 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1644 		&gIPv4AddressModule, &sDomain);
1645 	if (status != B_OK)
1646 		goto err6;
1647 
1648 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1649 		"list all current IPv4 multicast states");
1650 
1651 	return B_OK;
1652 
1653 err6:
1654 	hash_uninit(sFragmentHash);
1655 err5:
1656 	delete sMulticastState;
1657 err4:
1658 	mutex_destroy(&sReceivingProtocolLock);
1659 	mutex_destroy(&sMulticastGroupsLock);
1660 	mutex_destroy(&sFragmentLock);
1661 	mutex_destroy(&sRawSocketsLock);
1662 	return status;
1663 }
1664 
1665 
1666 status_t
1667 uninit_ipv4()
1668 {
1669 	mutex_lock(&sReceivingProtocolLock);
1670 
1671 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1672 
1673 	// put all the domain receiving protocols we gathered so far
1674 	for (uint32 i = 0; i < 256; i++) {
1675 		if (sReceivingProtocol[i] != NULL)
1676 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1677 	}
1678 
1679 	gStackModule->unregister_domain(sDomain);
1680 	mutex_unlock(&sReceivingProtocolLock);
1681 
1682 	delete sMulticastState;
1683 	hash_uninit(sFragmentHash);
1684 
1685 	mutex_destroy(&sMulticastGroupsLock);
1686 	mutex_destroy(&sFragmentLock);
1687 	mutex_destroy(&sRawSocketsLock);
1688 	mutex_destroy(&sReceivingProtocolLock);
1689 
1690 	return B_OK;
1691 }
1692 
1693 
1694 static status_t
1695 ipv4_std_ops(int32 op, ...)
1696 {
1697 	switch (op) {
1698 		case B_MODULE_INIT:
1699 			return init_ipv4();
1700 		case B_MODULE_UNINIT:
1701 			return uninit_ipv4();
1702 
1703 		default:
1704 			return B_ERROR;
1705 	}
1706 }
1707 
1708 
1709 net_protocol_module_info gIPv4Module = {
1710 	{
1711 		"network/protocols/ipv4/v1",
1712 		0,
1713 		ipv4_std_ops
1714 	},
1715 	NET_PROTOCOL_ATOMIC_MESSAGES,
1716 
1717 	ipv4_init_protocol,
1718 	ipv4_uninit_protocol,
1719 	ipv4_open,
1720 	ipv4_close,
1721 	ipv4_free,
1722 	ipv4_connect,
1723 	ipv4_accept,
1724 	ipv4_control,
1725 	ipv4_getsockopt,
1726 	ipv4_setsockopt,
1727 	ipv4_bind,
1728 	ipv4_unbind,
1729 	ipv4_listen,
1730 	ipv4_shutdown,
1731 	ipv4_send_data,
1732 	ipv4_send_routed_data,
1733 	ipv4_send_avail,
1734 	ipv4_read_data,
1735 	ipv4_read_avail,
1736 	ipv4_get_domain,
1737 	ipv4_get_mtu,
1738 	ipv4_receive_data,
1739 	ipv4_deliver_data,
1740 	ipv4_error,
1741 	ipv4_error_reply,
1742 	NULL,		// add_ancillary_data()
1743 	NULL,		// process_ancillary_data()
1744 	NULL,		// send_data_no_buffer()
1745 	NULL		// read_data_no_buffer()
1746 };
1747 
1748 module_dependency module_dependencies[] = {
1749 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1750 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1751 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1752 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1753 	{}
1754 };
1755 
1756 module_info* modules[] = {
1757 	(module_info*)&gIPv4Module,
1758 	NULL
1759 };
1760