xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision 23f179da55b1bd1ba84fbf3d3c56947e2c8d0aca)
1 /*
2  * Copyright 2006-2009, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4_address.h"
11 #include "multicast.h"
12 
13 #include <net_datalink.h>
14 #include <net_datalink_protocol.h>
15 #include <net_device.h>
16 #include <net_protocol.h>
17 #include <net_stack.h>
18 #include <NetBufferUtilities.h>
19 #include <ProtocolUtilities.h>
20 
21 #include <ByteOrder.h>
22 #include <KernelExport.h>
23 #include <util/AutoLock.h>
24 #include <util/list.h>
25 #include <util/khash.h>
26 #include <util/DoublyLinkedList.h>
27 #include <util/MultiHashTable.h>
28 
29 #include <netinet/in.h>
30 #include <netinet/ip.h>
31 #include <new>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <utility>
36 
37 
38 //#define TRACE_IPV4
39 #ifdef TRACE_IPV4
40 #	define TRACE(format, args...) \
41 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
42 #	define TRACE_SK(protocol, format, args...) \
43 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
44 			protocol , ##args)
45 #else
46 #	define TRACE(args...)		do { } while (0)
47 #	define TRACE_SK(args...)	do { } while (0)
48 #endif
49 
50 struct ipv4_header {
51 #if B_HOST_IS_LENDIAN == 1
52 	uint8		header_length : 4;	// header length in 32-bit words
53 	uint8		version : 4;
54 #else
55 	uint8		version : 4;
56 	uint8		header_length : 4;
57 #endif
58 	uint8		service_type;
59 	uint16		total_length;
60 	uint16		id;
61 	uint16		fragment_offset;
62 	uint8		time_to_live;
63 	uint8		protocol;
64 	uint16		checksum;
65 	in_addr_t	source;
66 	in_addr_t	destination;
67 
68 	uint16 HeaderLength() const { return header_length << 2; }
69 	uint16 TotalLength() const { return ntohs(total_length); }
70 	uint16 FragmentOffset() const { return ntohs(fragment_offset); }
71 } _PACKED;
72 
73 #define IP_VERSION				4
74 
75 // fragment flags
76 #define IP_RESERVED_FLAG		0x8000
77 #define IP_DONT_FRAGMENT		0x4000
78 #define IP_MORE_FRAGMENTS		0x2000
79 #define IP_FRAGMENT_OFFSET_MASK	0x1fff
80 
81 #define MAX_HASH_FRAGMENTS 		64
82 	// slots in the fragment packet's hash
83 #define FRAGMENT_TIMEOUT		60000000LL
84 	// discard fragment after 60 seconds
85 
86 typedef DoublyLinkedList<struct net_buffer,
87 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
88 
89 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
90 
91 struct ipv4_packet_key {
92 	in_addr_t	source;
93 	in_addr_t	destination;
94 	uint16		id;
95 	uint8		protocol;
96 };
97 
98 class FragmentPacket {
99 public:
100 							FragmentPacket(const ipv4_packet_key &key);
101 							~FragmentPacket();
102 
103 			status_t		AddFragment(uint16 start, uint16 end,
104 								net_buffer* buffer, bool lastFragment);
105 			status_t		Reassemble(net_buffer* to);
106 
107 			bool			IsComplete() const
108 								{ return fReceivedLastFragment
109 									&& fBytesLeft == 0; }
110 
111 	static	uint32			Hash(void* _packet, const void* _key, uint32 range);
112 	static	int				Compare(void* _packet, const void* _key);
113 	static	int32			NextOffset()
114 								{ return offsetof(FragmentPacket, fNext); }
115 	static	void			StaleTimer(struct net_timer* timer, void* data);
116 
117 private:
118 			FragmentPacket	*fNext;
119 			struct ipv4_packet_key fKey;
120 			bool			fReceivedLastFragment;
121 			int32			fBytesLeft;
122 			FragmentList	fFragments;
123 			net_timer		fTimer;
124 };
125 
126 
127 class RawSocket
128 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
129 public:
130 							RawSocket(net_socket* socket);
131 };
132 
133 typedef DoublyLinkedList<RawSocket> RawSocketList;
134 
135 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
136 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
137 
138 struct MulticastStateHash {
139 	typedef std::pair<const in_addr* , uint32> KeyType;
140 	typedef IPv4GroupInterface ValueType;
141 
142 	size_t HashKey(const KeyType &key) const
143 		{ return key.first->s_addr ^ key.second; }
144 	size_t Hash(ValueType* value) const
145 		{ return HashKey(std::make_pair(&value->Address(),
146 			value->Interface()->index)); }
147 	bool Compare(const KeyType &key, ValueType* value) const
148 		{ return value->Interface()->index == key.second
149 			&& value->Address().s_addr == key.first->s_addr; }
150 	bool CompareValues(ValueType* value1, ValueType* value2) const
151 		{ return value1->Interface()->index == value2->Interface()->index
152 			&& value1->Address().s_addr == value2->Address().s_addr; }
153 	ValueType*& GetLink(ValueType* value) const { return value->HashLink(); }
154 };
155 
156 
157 struct ipv4_protocol : net_protocol {
158 	ipv4_protocol()
159 		:
160 		multicast_filter(this)
161 	{
162 	}
163 
164 	RawSocket	*raw;
165 	uint8		service_type;
166 	uint8		time_to_live;
167 	uint8		multicast_time_to_live;
168 	uint32		flags;
169 	struct sockaddr* interface_address; // for IP_MULTICAST_IF
170 
171 	IPv4MulticastFilter multicast_filter;
172 };
173 
174 // protocol flags
175 #define IP_FLAG_HEADER_INCLUDED		0x01
176 #define IP_FLAG_RECEIVE_DEST_ADDR	0x02
177 
178 
179 static const int kDefaultTTL = 254;
180 static const int kDefaultMulticastTTL = 1;
181 
182 
183 extern net_protocol_module_info gIPv4Module;
184 	// we need this in ipv4_std_ops() for registering the AF_INET domain
185 
186 net_stack_module_info* gStackModule;
187 net_buffer_module_info* gBufferModule;
188 
189 static struct net_domain* sDomain;
190 static net_datalink_module_info* sDatalinkModule;
191 static net_socket_module_info* sSocketModule;
192 static int32 sPacketID;
193 static RawSocketList sRawSockets;
194 static mutex sRawSocketsLock;
195 static mutex sFragmentLock;
196 static hash_table* sFragmentHash;
197 static mutex sMulticastGroupsLock;
198 
199 typedef MultiHashTable<MulticastStateHash> MulticastState;
200 static MulticastState* sMulticastState;
201 
202 static net_protocol_module_info* sReceivingProtocol[256];
203 static mutex sReceivingProtocolLock;
204 
205 
206 static const char*
207 print_address(const in_addr* address, char* buf, size_t bufLen)
208 {
209 	unsigned int addr = ntohl(address->s_addr);
210 
211 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
212 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
213 
214 	return buf;
215 }
216 
217 
218 RawSocket::RawSocket(net_socket* socket)
219 	:
220 	DatagramSocket<>("ipv4 raw socket", socket)
221 {
222 }
223 
224 
225 //	#pragma mark -
226 
227 
228 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
229 	:
230 	fKey(key),
231 	fReceivedLastFragment(false),
232 	fBytesLeft(IP_MAXPACKET)
233 {
234 	gStackModule->init_timer(&fTimer, StaleTimer, this);
235 }
236 
237 
238 FragmentPacket::~FragmentPacket()
239 {
240 	// cancel the kill timer
241 	gStackModule->set_timer(&fTimer, -1);
242 
243 	// delete all fragments
244 	net_buffer* buffer;
245 	while ((buffer = fFragments.RemoveHead()) != NULL) {
246 		gBufferModule->free(buffer);
247 	}
248 }
249 
250 
251 status_t
252 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
253 	bool lastFragment)
254 {
255 	// restart the timer
256 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
257 
258 	if (start >= end) {
259 		// invalid fragment
260 		return B_BAD_DATA;
261 	}
262 
263 	// Search for a position in the list to insert the fragment
264 
265 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
266 	net_buffer* previous = NULL;
267 	net_buffer* next = NULL;
268 	while ((previous = iterator.Next()) != NULL) {
269 		if (previous->fragment.start <= start) {
270 			// The new fragment can be inserted after this one
271 			break;
272 		}
273 
274 		next = previous;
275 	}
276 
277 	// See if we already have the fragment's data
278 
279 	if (previous != NULL && previous->fragment.start <= start
280 		&& previous->fragment.end >= end) {
281 		// we do, so we can just drop this fragment
282 		gBufferModule->free(buffer);
283 		return B_OK;
284 	}
285 
286 	TRACE("    previous: %p, next: %p", previous, next);
287 
288 	// If we have parts of the data already, truncate as needed
289 
290 	if (previous != NULL && previous->fragment.end > start) {
291 		TRACE("    remove header %d bytes", previous->fragment.end - start);
292 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
293 		start = previous->fragment.end;
294 	}
295 	if (next != NULL && next->fragment.start < end) {
296 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
297 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
298 		end = next->fragment.start;
299 	}
300 
301 	// Now try if we can already merge the fragments together
302 
303 	// We will always keep the last buffer received, so that we can still
304 	// report an error (in which case we're not responsible for freeing it)
305 
306 	if (previous != NULL && previous->fragment.end == start) {
307 		fFragments.Remove(previous);
308 
309 		buffer->fragment.start = previous->fragment.start;
310 		buffer->fragment.end = end;
311 
312 		status_t status = gBufferModule->merge(buffer, previous, false);
313 		TRACE("    merge previous: %s", strerror(status));
314 		if (status != B_OK) {
315 			fFragments.Insert(next, previous);
316 			return status;
317 		}
318 
319 		fFragments.Insert(next, buffer);
320 
321 		// cut down existing hole
322 		fBytesLeft -= end - start;
323 
324 		if (lastFragment && !fReceivedLastFragment) {
325 			fReceivedLastFragment = true;
326 			fBytesLeft -= IP_MAXPACKET - end;
327 		}
328 
329 		TRACE("    hole length: %d", (int)fBytesLeft);
330 
331 		return B_OK;
332 	} else if (next != NULL && next->fragment.start == end) {
333 		net_buffer* afterNext = (net_buffer*)next->link.next;
334 		fFragments.Remove(next);
335 
336 		buffer->fragment.start = start;
337 		buffer->fragment.end = next->fragment.end;
338 
339 		status_t status = gBufferModule->merge(buffer, next, true);
340 		TRACE("    merge next: %s", strerror(status));
341 		if (status != B_OK) {
342 			// Insert "next" at its previous position
343 			fFragments.Insert(afterNext, next);
344 			return status;
345 		}
346 
347 		fFragments.Insert(afterNext, buffer);
348 
349 		// cut down existing hole
350 		fBytesLeft -= end - start;
351 
352 		if (lastFragment && !fReceivedLastFragment) {
353 			fReceivedLastFragment = true;
354 			fBytesLeft -= IP_MAXPACKET - end;
355 		}
356 
357 		TRACE("    hole length: %d", (int)fBytesLeft);
358 
359 		return B_OK;
360 	}
361 
362 	// We couldn't merge the fragments, so we need to add it as is
363 
364 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
365 
366 	buffer->fragment.start = start;
367 	buffer->fragment.end = end;
368 	fFragments.Insert(next, buffer);
369 
370 	// update length of the hole, if any
371 	fBytesLeft -= end - start;
372 
373 	if (lastFragment && !fReceivedLastFragment) {
374 		fReceivedLastFragment = true;
375 		fBytesLeft -= IP_MAXPACKET - end;
376 	}
377 
378 	TRACE("    hole length: %d", (int)fBytesLeft);
379 
380 	return B_OK;
381 }
382 
383 
384 /*!	Reassembles the fragments to the specified buffer \a to.
385 	This buffer must have been added via AddFragment() before.
386 */
387 status_t
388 FragmentPacket::Reassemble(net_buffer* to)
389 {
390 	if (!IsComplete())
391 		return B_ERROR;
392 
393 	net_buffer* buffer = NULL;
394 
395 	net_buffer* fragment;
396 	while ((fragment = fFragments.RemoveHead()) != NULL) {
397 		if (buffer != NULL) {
398 			status_t status;
399 			if (to == fragment) {
400 				status = gBufferModule->merge(fragment, buffer, false);
401 				buffer = fragment;
402 			} else
403 				status = gBufferModule->merge(buffer, fragment, true);
404 			if (status != B_OK)
405 				return status;
406 		} else
407 			buffer = fragment;
408 	}
409 
410 	if (buffer != to)
411 		panic("ipv4 packet reassembly did not work correctly.\n");
412 
413 	return B_OK;
414 }
415 
416 
417 int
418 FragmentPacket::Compare(void* _packet, const void* _key)
419 {
420 	const ipv4_packet_key* key = (ipv4_packet_key*)_key;
421 	ipv4_packet_key* packetKey = &((FragmentPacket*)_packet)->fKey;
422 
423 	if (packetKey->id == key->id
424 		&& packetKey->source == key->source
425 		&& packetKey->destination == key->destination
426 		&& packetKey->protocol == key->protocol)
427 		return 0;
428 
429 	return 1;
430 }
431 
432 
433 uint32
434 FragmentPacket::Hash(void* _packet, const void* _key, uint32 range)
435 {
436 	const struct ipv4_packet_key* key = (struct ipv4_packet_key*)_key;
437 	FragmentPacket* packet = (FragmentPacket*)_packet;
438 	if (packet != NULL)
439 		key = &packet->fKey;
440 
441 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
442 }
443 
444 
445 /*static*/ void
446 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
447 {
448 	FragmentPacket* packet = (FragmentPacket*)data;
449 	TRACE("Assembling FragmentPacket %p timed out!", packet);
450 
451 	MutexLocker locker(&sFragmentLock);
452 
453 	hash_remove(sFragmentHash, packet);
454 	delete packet;
455 }
456 
457 
458 //	#pragma mark -
459 
460 
461 #if 0
462 static void
463 dump_ipv4_header(ipv4_header &header)
464 {
465 	struct pretty_ipv4 {
466 	#if B_HOST_IS_LENDIAN == 1
467 		uint8 a;
468 		uint8 b;
469 		uint8 c;
470 		uint8 d;
471 	#else
472 		uint8 d;
473 		uint8 c;
474 		uint8 b;
475 		uint8 a;
476 	#endif
477 	};
478 	struct pretty_ipv4* src = (struct pretty_ipv4*)&header.source;
479 	struct pretty_ipv4* dst = (struct pretty_ipv4*)&header.destination;
480 	dprintf("  version: %d\n", header.version);
481 	dprintf("  header_length: 4 * %d\n", header.header_length);
482 	dprintf("  service_type: %d\n", header.service_type);
483 	dprintf("  total_length: %d\n", header.TotalLength());
484 	dprintf("  id: %d\n", ntohs(header.id));
485 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
486 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
487 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
488 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
489 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
490 	dprintf("  time_to_live: %d\n", header.time_to_live);
491 	dprintf("  protocol: %d\n", header.protocol);
492 	dprintf("  checksum: %d\n", ntohs(header.checksum));
493 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
494 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
495 }
496 #endif
497 
498 
499 static int
500 dump_ipv4_multicast(int argc, char** argv)
501 {
502 	MulticastState::Iterator it = sMulticastState->GetIterator();
503 
504 	while (it.HasNext()) {
505 		IPv4GroupInterface* state = it.Next();
506 
507 		char addressBuffer[64];
508 
509 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
510 			print_address(&state->Address(), addressBuffer,
511 			sizeof(addressBuffer)),
512 			state->Mode() == IPv4GroupInterface::kExclude
513 				? "Exclude" : "Include");
514 
515 		int count = 0;
516 		IPv4GroupInterface::AddressSet::Iterator it
517 			= state->Sources().GetIterator();
518 		while (it.HasNext()) {
519 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
520 				addressBuffer, sizeof(addressBuffer)));
521 			count++;
522 		}
523 
524 		kprintf("}> sock %p\n", state->Parent()->Socket());
525 	}
526 
527 	return 0;
528 }
529 
530 
531 /*!	Attempts to re-assemble fragmented packets.
532 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
533 		will point to its buffer, otherwise, it will be \c NULL.
534 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
535 */
536 static status_t
537 reassemble_fragments(const ipv4_header &header, net_buffer** _buffer)
538 {
539 	net_buffer* buffer = *_buffer;
540 	status_t status;
541 
542 	struct ipv4_packet_key key;
543 	key.source = (in_addr_t)header.source;
544 	key.destination = (in_addr_t)header.destination;
545 	key.id = header.id;
546 	key.protocol = header.protocol;
547 
548 	// TODO: Make locking finer grained.
549 	MutexLocker locker(&sFragmentLock);
550 
551 	FragmentPacket* packet = (FragmentPacket*)hash_lookup(sFragmentHash, &key);
552 	if (packet == NULL) {
553 		// New fragment packet
554 		packet = new (std::nothrow) FragmentPacket(key);
555 		if (packet == NULL)
556 			return B_NO_MEMORY;
557 
558 		// add packet to hash
559 		status = hash_insert(sFragmentHash, packet);
560 		if (status != B_OK) {
561 			delete packet;
562 			return status;
563 		}
564 	}
565 
566 	uint16 fragmentOffset = header.FragmentOffset();
567 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
568 	uint16 end = start + header.TotalLength() - header.HeaderLength();
569 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
570 
571 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
572 		lastFragment ? "last ": "", end - start, start);
573 
574 	// Remove header unless this is the first fragment
575 	if (start != 0)
576 		gBufferModule->remove_header(buffer, header.HeaderLength());
577 
578 	status = packet->AddFragment(start, end, buffer, lastFragment);
579 	if (status != B_OK)
580 		return status;
581 
582 	if (packet->IsComplete()) {
583 		hash_remove(sFragmentHash, packet);
584 			// no matter if reassembling succeeds, we won't need this packet
585 			// anymore
586 
587 		status = packet->Reassemble(buffer);
588 		delete packet;
589 
590 		// _buffer does not change
591 		return status;
592 	}
593 
594 	// This indicates that the packet is not yet complete
595 	*_buffer = NULL;
596 	return B_OK;
597 }
598 
599 
600 /*!	Fragments the incoming buffer and send all fragments via the specified
601 	\a route.
602 */
603 static status_t
604 send_fragments(ipv4_protocol* protocol, struct net_route* route,
605 	net_buffer* buffer, uint32 mtu)
606 {
607 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
608 
609 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
610 	if (originalHeader.Status() != B_OK)
611 		return originalHeader.Status();
612 
613 	uint16 headerLength = originalHeader->HeaderLength();
614 	uint32 bytesLeft = buffer->size - headerLength;
615 	uint32 fragmentOffset = 0;
616 	status_t status = B_OK;
617 
618 	net_buffer* headerBuffer = gBufferModule->split(buffer, headerLength);
619 	if (headerBuffer == NULL)
620 		return B_NO_MEMORY;
621 
622 	// TODO: we need to make sure ipv4_header is contiguous or
623 	// use another construct.
624 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
625 	ipv4_header* header = &bufferHeader.Data();
626 
627 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
628 	// this way)
629 	mtu -= headerLength;
630 	mtu &= ~7;
631 	TRACE("  adjusted MTU to %ld\n", mtu);
632 
633 	TRACE("  bytesLeft = %ld\n", bytesLeft);
634 	while (bytesLeft > 0) {
635 		uint32 fragmentLength = min_c(bytesLeft, mtu);
636 		bytesLeft -= fragmentLength;
637 		bool lastFragment = bytesLeft == 0;
638 
639 		header->total_length = htons(fragmentLength + headerLength);
640 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
641 			| (fragmentOffset >> 3));
642 		header->checksum = 0;
643 		header->checksum = gStackModule->checksum((uint8*)header,
644 			headerLength);
645 			// TODO: compute the checksum only for those parts that changed?
646 
647 		TRACE("  send fragment of %ld bytes (%ld bytes left)\n", fragmentLength, bytesLeft);
648 
649 		net_buffer* fragmentBuffer;
650 		if (!lastFragment) {
651 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
652 			fragmentOffset += fragmentLength;
653 		} else
654 			fragmentBuffer = buffer;
655 
656 		if (fragmentBuffer == NULL) {
657 			status = B_NO_MEMORY;
658 			break;
659 		}
660 
661 		// copy header to fragment
662 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
663 
664 		// send fragment
665 		if (status == B_OK)
666 			status = sDatalinkModule->send_data(route, fragmentBuffer);
667 
668 		if (lastFragment) {
669 			// we don't own the last buffer, so we don't have to free it
670 			break;
671 		}
672 
673 		if (status != B_OK) {
674 			gBufferModule->free(fragmentBuffer);
675 			break;
676 		}
677 	}
678 
679 	gBufferModule->free(headerBuffer);
680 	return status;
681 }
682 
683 
684 static status_t
685 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
686 	bool deliverToRaw)
687 {
688 	if (module->deliver_data == NULL)
689 		return B_OK;
690 
691 	MutexLocker _(sMulticastGroupsLock);
692 
693 	sockaddr_in* multicastAddr = (sockaddr_in*)buffer->destination;
694 
695 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
696 		&multicastAddr->sin_addr, buffer->interface->index));
697 
698 	while (it.HasNext()) {
699 		IPv4GroupInterface* state = it.Next();
700 
701 		if (deliverToRaw && state->Parent()->Socket()->raw == NULL)
702 			continue;
703 
704 		if (state->FilterAccepts(buffer)) {
705 			// as Multicast filters are installed with an IPv4 protocol
706 			// reference, we need to go and find the appropriate instance
707 			// related to the 'receiving protocol' with module 'module'.
708 			net_protocol* proto
709 				= state->Parent()->Socket()->socket->first_protocol;
710 
711 			while (proto && proto->module != module)
712 				proto = proto->next;
713 
714 			if (proto)
715 				module->deliver_data(proto, buffer);
716 		}
717 	}
718 
719 	return B_OK;
720 }
721 
722 
723 static void
724 raw_receive_data(net_buffer* buffer)
725 {
726 	MutexLocker locker(sRawSocketsLock);
727 
728 	if (sRawSockets.IsEmpty())
729 		return;
730 
731 	TRACE("RawReceiveData(%i)", buffer->protocol);
732 
733 	if (buffer->flags & MSG_MCAST) {
734 		// we need to call deliver_multicast here separately as
735 		// buffer still has the IP header, and it won't in the
736 		// next call. This isn't very optimized but works for now.
737 		// A better solution would be to hold separate hash tables
738 		// and lists for RAW and non-RAW sockets.
739 		deliver_multicast(&gIPv4Module, buffer, true);
740 	} else {
741 		RawSocketList::Iterator iterator = sRawSockets.GetIterator();
742 
743 		while (iterator.HasNext()) {
744 			RawSocket* raw = iterator.Next();
745 
746 			if (raw->Socket()->protocol == buffer->protocol)
747 				raw->SocketEnqueue(buffer);
748 		}
749 	}
750 }
751 
752 
753 static sockaddr*
754 fill_sockaddr_in(sockaddr_in* destination, const in_addr &source)
755 {
756 	memset(destination, 0, sizeof(sockaddr_in));
757 	destination->sin_family = AF_INET;
758 	destination->sin_addr = source;
759 	return (sockaddr*)destination;
760 }
761 
762 
763 status_t
764 IPv4Multicast::JoinGroup(IPv4GroupInterface* state)
765 {
766 	MutexLocker _(sMulticastGroupsLock);
767 
768 	sockaddr_in groupAddr;
769 	net_interface* interface = state->Interface();
770 
771 	status_t status = interface->first_info->join_multicast(
772 		interface->first_protocol,
773 		fill_sockaddr_in(&groupAddr, state->Address()));
774 	if (status != B_OK)
775 		return status;
776 
777 	sMulticastState->Insert(state);
778 	return B_OK;
779 }
780 
781 
782 status_t
783 IPv4Multicast::LeaveGroup(IPv4GroupInterface* state)
784 {
785 	MutexLocker _(sMulticastGroupsLock);
786 
787 	sMulticastState->Remove(state);
788 
789 	sockaddr_in groupAddr;
790 	net_interface* interface = state->Interface();
791 
792 	return interface->first_protocol->module->join_multicast(
793 		interface->first_protocol,
794 		fill_sockaddr_in(&groupAddr, state->Address()));
795 }
796 
797 
798 static net_protocol_module_info*
799 receiving_protocol(uint8 protocol)
800 {
801 	net_protocol_module_info* module = sReceivingProtocol[protocol];
802 	if (module != NULL)
803 		return module;
804 
805 	MutexLocker locker(sReceivingProtocolLock);
806 
807 	module = sReceivingProtocol[protocol];
808 	if (module != NULL)
809 		return module;
810 
811 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
812 			&module) == B_OK)
813 		sReceivingProtocol[protocol] = module;
814 
815 	return module;
816 }
817 
818 
819 static inline sockaddr*
820 fill_sockaddr_in(sockaddr_in* target, in_addr_t address)
821 {
822 	memset(target, 0, sizeof(sockaddr_in));
823 	target->sin_family = AF_INET;
824 	target->sin_len = sizeof(sockaddr_in);
825 	target->sin_addr.s_addr = address;
826 	return (sockaddr*)target;
827 }
828 
829 
830 static status_t
831 ipv4_delta_group(IPv4GroupInterface* group, int option,
832 	net_interface* interface, const in_addr* sourceAddr)
833 {
834 	switch (option) {
835 		case IP_ADD_MEMBERSHIP:
836 			return group->Add();
837 		case IP_DROP_MEMBERSHIP:
838 			return group->Drop();
839 		case IP_BLOCK_SOURCE:
840 			return group->BlockSource(*sourceAddr);
841 		case IP_UNBLOCK_SOURCE:
842 			return group->UnblockSource(*sourceAddr);
843 		case IP_ADD_SOURCE_MEMBERSHIP:
844 			return group->AddSSM(*sourceAddr);
845 		case IP_DROP_SOURCE_MEMBERSHIP:
846 			return group->DropSSM(*sourceAddr);
847 	}
848 
849 	return B_ERROR;
850 }
851 
852 
853 static status_t
854 ipv4_delta_membership(ipv4_protocol* protocol, int option,
855 	net_interface* interface, const in_addr* groupAddr,
856 	const in_addr* sourceAddr)
857 {
858 	IPv4MulticastFilter &filter = protocol->multicast_filter;
859 	IPv4GroupInterface* state = NULL;
860 	status_t status = B_OK;
861 
862 	switch (option) {
863 		case IP_ADD_MEMBERSHIP:
864 		case IP_ADD_SOURCE_MEMBERSHIP:
865 			status = filter.GetState(*groupAddr, interface, state, true);
866 			break;
867 
868 		case IP_DROP_MEMBERSHIP:
869 		case IP_BLOCK_SOURCE:
870 		case IP_UNBLOCK_SOURCE:
871 		case IP_DROP_SOURCE_MEMBERSHIP:
872 			filter.GetState(*groupAddr, interface, state, false);
873 			if (state == NULL) {
874 				if (option == IP_DROP_MEMBERSHIP
875 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
876 					return EADDRNOTAVAIL;
877 
878 				return B_BAD_VALUE;
879 			}
880 			break;
881 	}
882 
883 	if (status != B_OK)
884 		return status;
885 
886 	status = ipv4_delta_group(state, option, interface, sourceAddr);
887 	filter.ReturnState(state);
888 	return status;
889 }
890 
891 
892 static int
893 generic_to_ipv4(int option)
894 {
895 	switch (option) {
896 		case MCAST_JOIN_GROUP:
897 			return IP_ADD_MEMBERSHIP;
898 		case MCAST_JOIN_SOURCE_GROUP:
899 			return IP_ADD_SOURCE_MEMBERSHIP;
900 		case MCAST_LEAVE_GROUP:
901 			return IP_DROP_MEMBERSHIP;
902 		case MCAST_BLOCK_SOURCE:
903 			return IP_BLOCK_SOURCE;
904 		case MCAST_UNBLOCK_SOURCE:
905 			return IP_UNBLOCK_SOURCE;
906 		case MCAST_LEAVE_SOURCE_GROUP:
907 			return IP_DROP_SOURCE_MEMBERSHIP;
908 	}
909 
910 	return -1;
911 }
912 
913 
914 static net_interface*
915 get_multicast_interface(ipv4_protocol* protocol, const in_addr* address)
916 {
917 	sockaddr_in groupAddr;
918 	net_route* route = sDatalinkModule->get_route(sDomain,
919 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
920 	if (route == NULL)
921 		return NULL;
922 
923 	return route->interface;
924 }
925 
926 
927 static status_t
928 ipv4_delta_membership(ipv4_protocol* protocol, int option,
929 	in_addr* interfaceAddr, in_addr* groupAddr, in_addr* sourceAddr)
930 {
931 	net_interface* interface = NULL;
932 
933 	if (interfaceAddr->s_addr == INADDR_ANY) {
934 		interface = get_multicast_interface(protocol, groupAddr);
935 	} else {
936 		sockaddr_in address;
937 		interface = sDatalinkModule->get_interface_with_address(sDomain,
938 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
939 	}
940 
941 	if (interface == NULL)
942 		return ENODEV;
943 
944 	return ipv4_delta_membership(protocol, option, interface,
945 		groupAddr, sourceAddr);
946 }
947 
948 
949 static status_t
950 ipv4_generic_delta_membership(ipv4_protocol* protocol, int option,
951 	uint32 index, const sockaddr_storage* _groupAddr,
952 	const sockaddr_storage* _sourceAddr)
953 {
954 	if (_groupAddr->ss_family != AF_INET)
955 		return B_BAD_VALUE;
956 
957 	if (_sourceAddr && _sourceAddr->ss_family != AF_INET)
958 		return B_BAD_VALUE;
959 
960 	const in_addr* groupAddr = &((const sockaddr_in*)_groupAddr)->sin_addr;
961 
962 	net_interface* interface;
963 	if (index == 0)
964 		interface = get_multicast_interface(protocol, groupAddr);
965 	else
966 		interface = sDatalinkModule->get_interface(sDomain, index);
967 
968 	if (interface == NULL)
969 		return ENODEV;
970 
971 	const in_addr* sourceAddr = NULL;
972 	if (_sourceAddr)
973 		sourceAddr = &((const sockaddr_in*)_sourceAddr)->sin_addr;
974 
975 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
976 		groupAddr, sourceAddr);
977 }
978 
979 
980 static status_t
981 get_int_option(void* target, size_t length, int value)
982 {
983 	if (length != sizeof(int))
984 		return B_BAD_VALUE;
985 
986 	return user_memcpy(target, &value, sizeof(int));
987 }
988 
989 
990 template<typename Type> static status_t
991 set_int_option(Type &target, const void* _value, size_t length)
992 {
993 	int value;
994 
995 	if (length != sizeof(int))
996 		return B_BAD_VALUE;
997 
998 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
999 		return B_BAD_ADDRESS;
1000 
1001 	target = value;
1002 	return B_OK;
1003 }
1004 
1005 
1006 //	#pragma mark -
1007 
1008 
1009 net_protocol*
1010 ipv4_init_protocol(net_socket* socket)
1011 {
1012 	ipv4_protocol* protocol = new (std::nothrow) ipv4_protocol();
1013 	if (protocol == NULL)
1014 		return NULL;
1015 
1016 	protocol->raw = NULL;
1017 	protocol->service_type = 0;
1018 	protocol->time_to_live = kDefaultTTL;
1019 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
1020 	protocol->flags = 0;
1021 	protocol->interface_address = NULL;
1022 	return protocol;
1023 }
1024 
1025 
1026 status_t
1027 ipv4_uninit_protocol(net_protocol* _protocol)
1028 {
1029 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1030 
1031 	delete protocol->raw;
1032 	delete protocol->interface_address;
1033 	delete protocol;
1034 	return B_OK;
1035 }
1036 
1037 
1038 /*!	Since open() is only called on the top level protocol, when we get here
1039 	it means we are on a SOCK_RAW socket.
1040 */
1041 status_t
1042 ipv4_open(net_protocol* _protocol)
1043 {
1044 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1045 
1046 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1047 	if (raw == NULL)
1048 		return B_NO_MEMORY;
1049 
1050 	status_t status = raw->InitCheck();
1051 	if (status != B_OK) {
1052 		delete raw;
1053 		return status;
1054 	}
1055 
1056 	TRACE_SK(protocol, "Open()");
1057 
1058 	protocol->raw = raw;
1059 
1060 	MutexLocker locker(sRawSocketsLock);
1061 	sRawSockets.Add(raw);
1062 	return B_OK;
1063 }
1064 
1065 
1066 status_t
1067 ipv4_close(net_protocol* _protocol)
1068 {
1069 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1070 	RawSocket* raw = protocol->raw;
1071 	if (raw == NULL)
1072 		return B_ERROR;
1073 
1074 	TRACE_SK(protocol, "Close()");
1075 
1076 	MutexLocker locker(sRawSocketsLock);
1077 	sRawSockets.Remove(raw);
1078 	delete raw;
1079 	protocol->raw = NULL;
1080 
1081 	return B_OK;
1082 }
1083 
1084 
1085 status_t
1086 ipv4_free(net_protocol* protocol)
1087 {
1088 	return B_OK;
1089 }
1090 
1091 
1092 status_t
1093 ipv4_connect(net_protocol* protocol, const struct sockaddr* address)
1094 {
1095 	return B_ERROR;
1096 }
1097 
1098 
1099 status_t
1100 ipv4_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1101 {
1102 	return EOPNOTSUPP;
1103 }
1104 
1105 
1106 status_t
1107 ipv4_control(net_protocol* _protocol, int level, int option, void* value,
1108 	size_t* _length)
1109 {
1110 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1111 		return sDatalinkModule->control(sDomain, option, value, _length);
1112 
1113 	return B_BAD_VALUE;
1114 }
1115 
1116 
1117 status_t
1118 ipv4_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1119 	int* _length)
1120 {
1121 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1122 
1123 	if (level == IPPROTO_IP) {
1124 		if (option == IP_HDRINCL) {
1125 			return get_int_option(value, *_length,
1126 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1127 		}
1128 		if (option == IP_RECVDSTADDR) {
1129 			return get_int_option(value, *_length,
1130 				(protocol->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0);
1131 		}
1132 		if (option == IP_TTL)
1133 			return get_int_option(value, *_length, protocol->time_to_live);
1134 		if (option == IP_TOS)
1135 			return get_int_option(value, *_length, protocol->service_type);
1136 		if (option == IP_MULTICAST_TTL) {
1137 			return get_int_option(value, *_length,
1138 				protocol->multicast_time_to_live);
1139 		}
1140 		if (option == IP_ADD_MEMBERSHIP
1141 			|| option == IP_DROP_MEMBERSHIP
1142 			|| option == IP_BLOCK_SOURCE
1143 			|| option == IP_UNBLOCK_SOURCE
1144 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1145 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1146 			|| option == MCAST_JOIN_GROUP
1147 			|| option == MCAST_LEAVE_GROUP
1148 			|| option == MCAST_BLOCK_SOURCE
1149 			|| option == MCAST_UNBLOCK_SOURCE
1150 			|| option == MCAST_JOIN_SOURCE_GROUP
1151 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1152 			// RFC 3678, Section 4.1:
1153 			// ``An error of EOPNOTSUPP is returned if these options are
1154 			// used with getsockopt().''
1155 			return EOPNOTSUPP;
1156 		}
1157 
1158 		dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1159 		return ENOPROTOOPT;
1160 	}
1161 
1162 	return sSocketModule->get_option(protocol->socket, level, option, value,
1163 		_length);
1164 }
1165 
1166 
1167 status_t
1168 ipv4_setsockopt(net_protocol* _protocol, int level, int option,
1169 	const void* value, int length)
1170 {
1171 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1172 
1173 	if (level == IPPROTO_IP) {
1174 		if (option == IP_HDRINCL) {
1175 			int headerIncluded;
1176 			if (length != sizeof(int))
1177 				return B_BAD_VALUE;
1178 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded))
1179 					!= B_OK)
1180 				return B_BAD_ADDRESS;
1181 
1182 			if (headerIncluded)
1183 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1184 			else
1185 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1186 
1187 			return B_OK;
1188 		}
1189 		if (option == IP_RECVDSTADDR) {
1190 			int getAddress;
1191 			if (length != sizeof(int))
1192 				return B_BAD_VALUE;
1193 			if (user_memcpy(&getAddress, value, sizeof(int)) != B_OK)
1194 				return B_BAD_ADDRESS;
1195 
1196 			if (getAddress && (protocol->socket->type == SOCK_DGRAM
1197 					|| protocol->socket->type == SOCK_RAW))
1198 				protocol->flags |= IP_FLAG_RECEIVE_DEST_ADDR;
1199 			else
1200 				protocol->flags &= ~IP_FLAG_RECEIVE_DEST_ADDR;
1201 
1202 			return B_OK;
1203 		}
1204 		if (option == IP_TTL)
1205 			return set_int_option(protocol->time_to_live, value, length);
1206 		if (option == IP_TOS)
1207 			return set_int_option(protocol->service_type, value, length);
1208 		if (option == IP_MULTICAST_IF) {
1209 			if (length != sizeof(struct in_addr))
1210 				return B_BAD_VALUE;
1211 
1212 			struct sockaddr_in* address = new (std::nothrow) sockaddr_in;
1213 			if (address == NULL)
1214 				return B_NO_MEMORY;
1215 
1216 			if (user_memcpy(&address->sin_addr, value, sizeof(struct in_addr))
1217 					!= B_OK) {
1218 				delete address;
1219 				return B_BAD_ADDRESS;
1220 			}
1221 
1222 			// Using INADDR_ANY to remove the previous setting.
1223 			if (address->sin_addr.s_addr == htonl(INADDR_ANY)) {
1224 				delete address;
1225 				delete protocol->interface_address;
1226 				protocol->interface_address = NULL;
1227 				return B_OK;
1228 			}
1229 
1230 			struct net_interface* interface
1231 				= sDatalinkModule->get_interface_with_address(sDomain,
1232 					(struct sockaddr*)address);
1233 			if (interface == NULL) {
1234 				delete address;
1235 				return EADDRNOTAVAIL;
1236 			}
1237 
1238 			delete protocol->interface_address;
1239 			protocol->interface_address = (struct sockaddr*)address;
1240 			return B_OK;
1241 		}
1242 		if (option == IP_MULTICAST_TTL) {
1243 			return set_int_option(protocol->multicast_time_to_live, value,
1244 				length);
1245 		}
1246 		if (option == IP_ADD_MEMBERSHIP || option == IP_DROP_MEMBERSHIP) {
1247 			ip_mreq mreq;
1248 			if (length != sizeof(ip_mreq))
1249 				return B_BAD_VALUE;
1250 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) != B_OK)
1251 				return B_BAD_ADDRESS;
1252 
1253 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1254 				&mreq.imr_multiaddr, NULL);
1255 		}
1256 		if (option == IP_BLOCK_SOURCE
1257 			|| option == IP_UNBLOCK_SOURCE
1258 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1259 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1260 			ip_mreq_source mreq;
1261 			if (length != sizeof(ip_mreq_source))
1262 				return B_BAD_VALUE;
1263 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) != B_OK)
1264 				return B_BAD_ADDRESS;
1265 
1266 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1267 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1268 		}
1269 		if (option == MCAST_LEAVE_GROUP || option == MCAST_JOIN_GROUP) {
1270 			group_req greq;
1271 			if (length != sizeof(group_req))
1272 				return B_BAD_VALUE;
1273 			if (user_memcpy(&greq, value, sizeof(group_req)) != B_OK)
1274 				return B_BAD_ADDRESS;
1275 
1276 			return ipv4_generic_delta_membership(protocol, option,
1277 				greq.gr_interface, &greq.gr_group, NULL);
1278 		}
1279 		if (option == MCAST_BLOCK_SOURCE
1280 			|| option == MCAST_UNBLOCK_SOURCE
1281 			|| option == MCAST_JOIN_SOURCE_GROUP
1282 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1283 			group_source_req greq;
1284 			if (length != sizeof(group_source_req))
1285 				return B_BAD_VALUE;
1286 			if (user_memcpy(&greq, value, sizeof(group_source_req)) != B_OK)
1287 				return B_BAD_ADDRESS;
1288 
1289 			return ipv4_generic_delta_membership(protocol, option,
1290 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1291 		}
1292 
1293 		dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1294 		return ENOPROTOOPT;
1295 	}
1296 
1297 	return sSocketModule->set_option(protocol->socket, level, option,
1298 		value, length);
1299 }
1300 
1301 
1302 status_t
1303 ipv4_bind(net_protocol* protocol, const struct sockaddr* address)
1304 {
1305 	if (address->sa_family != AF_INET)
1306 		return EAFNOSUPPORT;
1307 
1308 	// only INADDR_ANY and addresses of local interfaces are accepted:
1309 	if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY
1310 		|| IN_MULTICAST(ntohl(((sockaddr_in*)address)->sin_addr.s_addr))
1311 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1312 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1313 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1314 			// explicitly set length, as our callers can't be trusted to
1315 			// always provide the correct length!
1316 		return B_OK;
1317 	}
1318 
1319 	return B_ERROR;
1320 		// address is unknown on this host
1321 }
1322 
1323 
1324 status_t
1325 ipv4_unbind(net_protocol* protocol, struct sockaddr* address)
1326 {
1327 	// nothing to do here
1328 	return B_OK;
1329 }
1330 
1331 
1332 status_t
1333 ipv4_listen(net_protocol* protocol, int count)
1334 {
1335 	return EOPNOTSUPP;
1336 }
1337 
1338 
1339 status_t
1340 ipv4_shutdown(net_protocol* protocol, int direction)
1341 {
1342 	return EOPNOTSUPP;
1343 }
1344 
1345 
1346 status_t
1347 ipv4_send_routed_data(net_protocol* _protocol, struct net_route* route,
1348 	net_buffer* buffer)
1349 {
1350 	if (route == NULL)
1351 		return B_BAD_VALUE;
1352 
1353 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1354 	net_interface* interface = route->interface;
1355 
1356 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1357 		buffer->size);
1358 
1359 	sockaddr_in& source = *(sockaddr_in*)buffer->source;
1360 	sockaddr_in& destination = *(sockaddr_in*)buffer->destination;
1361 	sockaddr_in& broadcastAddress = *(sockaddr_in*)interface->destination;
1362 
1363 	bool headerIncluded = false, checksumNeeded = true;
1364 	if (protocol != NULL)
1365 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1366 
1367 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1368 
1369 	if (destination.sin_addr.s_addr == INADDR_ANY)
1370 		return EDESTADDRREQ;
1371 
1372 	if ((interface->device->flags & IFF_BROADCAST) != 0
1373 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1374 			|| destination.sin_addr.s_addr
1375 				== broadcastAddress.sin_addr.s_addr)) {
1376 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1377 			return B_BAD_VALUE;
1378 		buffer->flags |= MSG_BCAST;
1379 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr)))
1380 		buffer->flags |= MSG_MCAST;
1381 
1382 	// Add IP header (if needed)
1383 
1384 	if (!headerIncluded) {
1385 		NetBufferPrepend<ipv4_header> header(buffer);
1386 		if (header.Status() != B_OK)
1387 			return header.Status();
1388 
1389 		header->version = IP_VERSION;
1390 		header->header_length = sizeof(ipv4_header) / 4;
1391 		header->service_type = protocol ? protocol->service_type : 0;
1392 		header->total_length = htons(buffer->size);
1393 		header->id = htons(atomic_add(&sPacketID, 1));
1394 		header->fragment_offset = 0;
1395 		if (protocol) {
1396 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1397 				? protocol->multicast_time_to_live : protocol->time_to_live;
1398 		} else {
1399 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1400 				? kDefaultMulticastTTL : kDefaultTTL;
1401 		}
1402 		header->protocol = protocol
1403 			? protocol->socket->protocol : buffer->protocol;
1404 		header->checksum = 0;
1405 
1406 		header->source = source.sin_addr.s_addr;
1407 		header->destination = destination.sin_addr.s_addr;
1408 	} else {
1409 		// if IP_HDRINCL, check if the source address is set
1410 		NetBufferHeaderReader<ipv4_header> header(buffer);
1411 		if (header.Status() != B_OK)
1412 			return header.Status();
1413 
1414 		if (header->source == 0) {
1415 			header->source = source.sin_addr.s_addr;
1416 			header->checksum = 0;
1417 			header.Sync();
1418 		} else
1419 			checksumNeeded = false;
1420 	}
1421 
1422 	if (buffer->size > 0xffff)
1423 		return EMSGSIZE;
1424 
1425 	if (checksumNeeded) {
1426 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1427 			sizeof(ipv4_header), true);
1428 	}
1429 
1430 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer "
1431 		"checksum: %ld",
1432 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1433 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1434 
1435 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08lx",
1436 		ntohl(destination.sin_addr.s_addr));
1437 
1438 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1439 	if (buffer->size > mtu) {
1440 		// we need to fragment the packet
1441 		return send_fragments(protocol, route, buffer, mtu);
1442 	}
1443 
1444 	return sDatalinkModule->send_data(route, buffer);
1445 }
1446 
1447 
1448 status_t
1449 ipv4_send_data(net_protocol* _protocol, net_buffer* buffer)
1450 {
1451 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1452 
1453 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1454 
1455 	if (protocol && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1456 		if (buffer->size < sizeof(ipv4_header))
1457 			return B_BAD_VALUE;
1458 
1459 		sockaddr_in* source = (sockaddr_in*)buffer->source;
1460 		sockaddr_in* destination = (sockaddr_in*)buffer->destination;
1461 
1462 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1463 			offsetof(ipv4_header, source)>(buffer));
1464 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1465 			offsetof(ipv4_header, destination)>(buffer));
1466 	}
1467 
1468 	// handle IP_MULTICAST_IF
1469 	if (IN_MULTICAST(ntohl(((sockaddr_in*)buffer->destination)->
1470 			sin_addr.s_addr)) && protocol->interface_address != NULL) {
1471 		net_interface* interface
1472 			= sDatalinkModule->get_interface_with_address(sDomain,
1473 				protocol->interface_address);
1474 		if (interface == NULL || (interface->flags & IFF_UP) == 0)
1475 			return EADDRNOTAVAIL;
1476 
1477 		buffer->interface = interface;
1478 
1479 		net_route* route = sDatalinkModule->get_route(sDomain,
1480 			interface->address);
1481 		if (route == NULL)
1482 			return ENETUNREACH;
1483 
1484 		return sDatalinkModule->send_data(route, buffer);
1485 	}
1486 
1487 	return sDatalinkModule->send_datagram(protocol, sDomain, buffer);
1488 }
1489 
1490 
1491 ssize_t
1492 ipv4_send_avail(net_protocol* protocol)
1493 {
1494 	return B_ERROR;
1495 }
1496 
1497 
1498 status_t
1499 ipv4_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1500 	net_buffer** _buffer)
1501 {
1502 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1503 	RawSocket* raw = protocol->raw;
1504 	if (raw == NULL)
1505 		return B_ERROR;
1506 
1507 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1508 
1509 	return raw->SocketDequeue(flags, _buffer);
1510 }
1511 
1512 
1513 ssize_t
1514 ipv4_read_avail(net_protocol* _protocol)
1515 {
1516 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1517 	RawSocket* raw = protocol->raw;
1518 	if (raw == NULL)
1519 		return B_ERROR;
1520 
1521 	return raw->AvailableData();
1522 }
1523 
1524 
1525 struct net_domain*
1526 ipv4_get_domain(net_protocol* protocol)
1527 {
1528 	return sDomain;
1529 }
1530 
1531 
1532 size_t
1533 ipv4_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1534 {
1535 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1536 	if (route == NULL)
1537 		return 0;
1538 
1539 	size_t mtu;
1540 	if (route->mtu != 0)
1541 		mtu = route->mtu;
1542 	else
1543 		mtu = route->interface->mtu;
1544 
1545 	sDatalinkModule->put_route(sDomain, route);
1546 	return mtu - sizeof(ipv4_header);
1547 }
1548 
1549 
1550 status_t
1551 ipv4_receive_data(net_buffer* buffer)
1552 {
1553 	TRACE("ReceiveData(%p [%ld bytes])", buffer, buffer->size);
1554 
1555 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1556 	if (bufferHeader.Status() != B_OK)
1557 		return bufferHeader.Status();
1558 
1559 	ipv4_header &header = bufferHeader.Data();
1560 	//dump_ipv4_header(header);
1561 
1562 	if (header.version != IP_VERSION)
1563 		return B_BAD_TYPE;
1564 
1565 	uint16 packetLength = header.TotalLength();
1566 	uint16 headerLength = header.HeaderLength();
1567 	if (packetLength > buffer->size
1568 		|| headerLength < sizeof(ipv4_header))
1569 		return B_BAD_DATA;
1570 
1571 	// TODO: would be nice to have a direct checksum function somewhere
1572 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1573 		return B_BAD_DATA;
1574 
1575 	struct sockaddr_in& source = *(struct sockaddr_in*)buffer->source;
1576 	struct sockaddr_in& destination = *(struct sockaddr_in*)buffer->destination;
1577 
1578 	fill_sockaddr_in(&source, header.source);
1579 	fill_sockaddr_in(&destination, header.destination);
1580 
1581 	// lower layers notion of Broadcast or Multicast have no relevance to us
1582 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1583 
1584 	if (header.destination == INADDR_BROADCAST) {
1585 		buffer->flags |= MSG_BCAST;
1586 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1587 		buffer->flags |= MSG_MCAST;
1588 	} else {
1589 		uint32 matchedAddressType = 0;
1590 		// test if the packet is really for us
1591 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1592 			&buffer->interface, &matchedAddressType)) {
1593 			TRACE("  ReceiveData(): packet was not for us %lx -> %lx",
1594 				ntohl(header.source), ntohl(header.destination));
1595 			return B_ERROR;
1596 		}
1597 
1598 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1599 		buffer->flags |= matchedAddressType;
1600 	}
1601 
1602 	uint8 protocol = buffer->protocol = header.protocol;
1603 
1604 	// remove any trailing/padding data
1605 	status_t status = gBufferModule->trim(buffer, packetLength);
1606 	if (status != B_OK)
1607 		return status;
1608 
1609 	// check for fragmentation
1610 	uint16 fragmentOffset = ntohs(header.fragment_offset);
1611 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1612 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1613 		// this is a fragment
1614 		TRACE("  ReceiveData(): Found a Fragment!");
1615 		status = reassemble_fragments(header, &buffer);
1616 		TRACE("  ReceiveData():  -> %s", strerror(status));
1617 		if (status != B_OK)
1618 			return status;
1619 
1620 		if (buffer == NULL) {
1621 			// buffer was put into fragment packet
1622 			TRACE("  ReceiveData(): Not yet assembled.");
1623 			return B_OK;
1624 		}
1625 	}
1626 
1627 	// Since the buffer might have been changed (reassembled fragment)
1628 	// we must no longer access bufferHeader or header anymore after
1629 	// this point
1630 
1631 	raw_receive_data(buffer);
1632 
1633 	gBufferModule->remove_header(buffer, headerLength);
1634 		// the header is of variable size and may include IP options
1635 		// (that we ignore for now)
1636 
1637 	net_protocol_module_info* module = receiving_protocol(protocol);
1638 	if (module == NULL) {
1639 		// no handler for this packet
1640 		return EAFNOSUPPORT;
1641 	}
1642 
1643 	if ((buffer->flags & MSG_MCAST) != 0) {
1644 		// Unfortunely historical reasons dictate that the IP multicast
1645 		// model be a little different from the unicast one. We deliver
1646 		// this frame directly to all sockets registered with interest
1647 		// for this multicast group.
1648 		return deliver_multicast(module, buffer, false);
1649 	}
1650 
1651 	return module->receive_data(buffer);
1652 }
1653 
1654 
1655 status_t
1656 ipv4_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1657 {
1658 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1659 
1660 	if (protocol->raw == NULL)
1661 		return B_ERROR;
1662 
1663 	return protocol->raw->SocketEnqueue(buffer);
1664 }
1665 
1666 
1667 status_t
1668 ipv4_error(uint32 code, net_buffer* data)
1669 {
1670 	return B_ERROR;
1671 }
1672 
1673 
1674 status_t
1675 ipv4_error_reply(net_protocol* protocol, net_buffer* causedError, uint32 code,
1676 	void* errorData)
1677 {
1678 	return B_ERROR;
1679 }
1680 
1681 
1682 ssize_t
1683 ipv4_process_ancillary_data_no_container(net_protocol* protocol,
1684 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1685 {
1686 	ssize_t bytesWritten = 0;
1687 
1688 	if ((((ipv4_protocol*)protocol)->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0) {
1689 		if (msgControlLen < CMSG_SPACE(sizeof(struct in_addr)))
1690 			return B_NO_MEMORY;
1691 
1692 		cmsghdr* messageHeader = (cmsghdr*)msgControl;
1693 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1694 		messageHeader->cmsg_level = IPPROTO_IP;
1695 		messageHeader->cmsg_type = IP_RECVDSTADDR;
1696 
1697 		memcpy(CMSG_DATA(messageHeader),
1698 		 	&((struct sockaddr_in*)buffer->destination)->sin_addr,
1699 		 	sizeof(struct in_addr));
1700 
1701 		bytesWritten += CMSG_SPACE(sizeof(struct in_addr));
1702 	}
1703 
1704 	return bytesWritten;
1705 }
1706 
1707 
1708 //	#pragma mark -
1709 
1710 
1711 status_t
1712 init_ipv4()
1713 {
1714 	sPacketID = (int32)system_time();
1715 
1716 	mutex_init(&sRawSocketsLock, "raw sockets");
1717 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1718 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1719 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1720 
1721 	status_t status;
1722 
1723 	sMulticastState = new MulticastState();
1724 	if (sMulticastState == NULL) {
1725 		status = B_NO_MEMORY;
1726 		goto err4;
1727 	}
1728 
1729 	status = sMulticastState->Init();
1730 	if (status != B_OK)
1731 		goto err5;
1732 
1733 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1734 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1735 	if (sFragmentHash == NULL)
1736 		goto err5;
1737 
1738 	new (&sRawSockets) RawSocketList;
1739 		// static initializers do not work in the kernel,
1740 		// so we have to do it here, manually
1741 		// TODO: for modules, this shouldn't be required
1742 
1743 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1744 		"network/protocols/ipv4/v1", NULL);
1745 	if (status != B_OK)
1746 		goto err6;
1747 
1748 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1749 		&gIPv4AddressModule, &sDomain);
1750 	if (status != B_OK)
1751 		goto err6;
1752 
1753 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1754 		"list all current IPv4 multicast states");
1755 
1756 	return B_OK;
1757 
1758 err6:
1759 	hash_uninit(sFragmentHash);
1760 err5:
1761 	delete sMulticastState;
1762 err4:
1763 	mutex_destroy(&sReceivingProtocolLock);
1764 	mutex_destroy(&sMulticastGroupsLock);
1765 	mutex_destroy(&sFragmentLock);
1766 	mutex_destroy(&sRawSocketsLock);
1767 	return status;
1768 }
1769 
1770 
1771 status_t
1772 uninit_ipv4()
1773 {
1774 	mutex_lock(&sReceivingProtocolLock);
1775 
1776 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1777 
1778 	// put all the domain receiving protocols we gathered so far
1779 	for (uint32 i = 0; i < 256; i++) {
1780 		if (sReceivingProtocol[i] != NULL)
1781 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1782 	}
1783 
1784 	gStackModule->unregister_domain(sDomain);
1785 	mutex_unlock(&sReceivingProtocolLock);
1786 
1787 	delete sMulticastState;
1788 	hash_uninit(sFragmentHash);
1789 
1790 	mutex_destroy(&sMulticastGroupsLock);
1791 	mutex_destroy(&sFragmentLock);
1792 	mutex_destroy(&sRawSocketsLock);
1793 	mutex_destroy(&sReceivingProtocolLock);
1794 
1795 	return B_OK;
1796 }
1797 
1798 
1799 static status_t
1800 ipv4_std_ops(int32 op, ...)
1801 {
1802 	switch (op) {
1803 		case B_MODULE_INIT:
1804 			return init_ipv4();
1805 		case B_MODULE_UNINIT:
1806 			return uninit_ipv4();
1807 
1808 		default:
1809 			return B_ERROR;
1810 	}
1811 }
1812 
1813 
1814 net_protocol_module_info gIPv4Module = {
1815 	{
1816 		"network/protocols/ipv4/v1",
1817 		0,
1818 		ipv4_std_ops
1819 	},
1820 	NET_PROTOCOL_ATOMIC_MESSAGES,
1821 
1822 	ipv4_init_protocol,
1823 	ipv4_uninit_protocol,
1824 	ipv4_open,
1825 	ipv4_close,
1826 	ipv4_free,
1827 	ipv4_connect,
1828 	ipv4_accept,
1829 	ipv4_control,
1830 	ipv4_getsockopt,
1831 	ipv4_setsockopt,
1832 	ipv4_bind,
1833 	ipv4_unbind,
1834 	ipv4_listen,
1835 	ipv4_shutdown,
1836 	ipv4_send_data,
1837 	ipv4_send_routed_data,
1838 	ipv4_send_avail,
1839 	ipv4_read_data,
1840 	ipv4_read_avail,
1841 	ipv4_get_domain,
1842 	ipv4_get_mtu,
1843 	ipv4_receive_data,
1844 	ipv4_deliver_data,
1845 	ipv4_error,
1846 	ipv4_error_reply,
1847 	NULL,		// add_ancillary_data()
1848 	NULL,		// process_ancillary_data()
1849 	ipv4_process_ancillary_data_no_container,
1850 	NULL,		// send_data_no_buffer()
1851 	NULL		// read_data_no_buffer()
1852 };
1853 
1854 module_dependency module_dependencies[] = {
1855 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1856 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1857 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1858 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1859 	{}
1860 };
1861 
1862 module_info* modules[] = {
1863 	(module_info*)&gIPv4Module,
1864 	NULL
1865 };
1866