xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision be9a70562e3c6552efb0caa53bd26965e7e1bed7)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4.h"
11 #include "ipv4_address.h"
12 #include "multicast.h"
13 
14 #include <net_datalink.h>
15 #include <net_datalink_protocol.h>
16 #include <net_device.h>
17 #include <net_protocol.h>
18 #include <net_stack.h>
19 #include <NetBufferUtilities.h>
20 #include <ProtocolUtilities.h>
21 
22 #include <KernelExport.h>
23 #include <util/AutoLock.h>
24 #include <util/list.h>
25 #include <util/DoublyLinkedList.h>
26 #include <util/MultiHashTable.h>
27 
28 #include <netinet/in.h>
29 #include <netinet/ip.h>
30 #include <new>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <utility>
35 
36 
37 //#define TRACE_IPV4
38 #ifdef TRACE_IPV4
39 #	define TRACE(format, args...) \
40 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
41 #	define TRACE_SK(protocol, format, args...) \
42 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
43 			protocol , ##args)
44 #	define TRACE_ONLY(x) x
45 #else
46 #	define TRACE(args...) ;
47 #	define TRACE_SK(args...) ;
48 #	define TRACE_ONLY(x)
49 #endif
50 
51 
52 #define MAX_HASH_FRAGMENTS 		64
53 	// slots in the fragment packet's hash
54 #define FRAGMENT_TIMEOUT		60000000LL
55 	// discard fragment after 60 seconds
56 
57 
58 typedef DoublyLinkedList<struct net_buffer,
59 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
60 
61 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
62 
63 struct ipv4_packet_key {
64 	in_addr_t	source;
65 	in_addr_t	destination;
66 	uint16		id;
67 	uint8		protocol;
68 };
69 
70 
71 class FragmentPacket {
72 public:
73 								FragmentPacket(const ipv4_packet_key& key);
74 								~FragmentPacket();
75 
76 			status_t			AddFragment(uint16 start, uint16 end,
77 									net_buffer* buffer, bool lastFragment);
78 			status_t			Reassemble(net_buffer* to);
79 
80 			bool				IsComplete() const
81 									{ return fReceivedLastFragment
82 										&& fBytesLeft == 0; }
83 
84 			const ipv4_packet_key& Key() const { return fKey; }
85 			FragmentPacket*&	HashTableLink() { return fNext; }
86 
87 	static	void				StaleTimer(struct net_timer* timer, void* data);
88 
89 private:
90 			FragmentPacket*		fNext;
91 			struct ipv4_packet_key fKey;
92 			uint32				fIndex;
93 			bool				fReceivedLastFragment;
94 			int32				fBytesLeft;
95 			FragmentList		fFragments;
96 			net_timer			fTimer;
97 };
98 
99 
100 struct FragmentHashDefinition {
101 	typedef ipv4_packet_key KeyType;
102 	typedef FragmentPacket ValueType;
103 
104 	size_t HashKey(const KeyType& key) const
105 	{
106 		return (key.source ^ key.destination ^ key.protocol ^ key.id);
107 	}
108 
109 	size_t Hash(ValueType* value) const
110 	{
111 		return HashKey(value->Key());
112 	}
113 
114 	bool Compare(const KeyType& key, ValueType* value) const
115 	{
116 		const ipv4_packet_key& packetKey = value->Key();
117 
118 		return packetKey.id == key.id
119 			&& packetKey.source == key.source
120 			&& packetKey.destination == key.destination
121 			&& packetKey.protocol == key.protocol;
122 	}
123 
124 	ValueType*& GetLink(ValueType* value) const
125 	{
126 		return value->HashTableLink();
127 	}
128 };
129 
130 typedef BOpenHashTable<FragmentHashDefinition, false, true> FragmentTable;
131 
132 
133 class RawSocket
134 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
135 public:
136 								RawSocket(net_socket* socket);
137 };
138 
139 typedef DoublyLinkedList<RawSocket> RawSocketList;
140 
141 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
142 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
143 
144 struct MulticastStateHash {
145 	typedef std::pair<const in_addr* , uint32> KeyType;
146 	typedef IPv4GroupInterface ValueType;
147 
148 	size_t HashKey(const KeyType &key) const
149 		{ return key.first->s_addr ^ key.second; }
150 	size_t Hash(ValueType* value) const
151 		{ return HashKey(std::make_pair(&value->Address(),
152 			value->Interface()->index)); }
153 	bool Compare(const KeyType &key, ValueType* value) const
154 		{ return value->Interface()->index == key.second
155 			&& value->Address().s_addr == key.first->s_addr; }
156 	bool CompareValues(ValueType* value1, ValueType* value2) const
157 		{ return value1->Interface()->index == value2->Interface()->index
158 			&& value1->Address().s_addr == value2->Address().s_addr; }
159 	ValueType*& GetLink(ValueType* value) const { return value->MulticastGroupsHashLink(); }
160 };
161 
162 
163 struct ipv4_protocol : net_protocol {
164 	ipv4_protocol()
165 		:
166 		raw(NULL),
167 		multicast_filter(this)
168 	{
169 	}
170 
171 	~ipv4_protocol()
172 	{
173 		delete raw;
174 	}
175 
176 	RawSocket*			raw;
177 	uint8				service_type;
178 	uint8				time_to_live;
179 	uint8				multicast_time_to_live;
180 	uint32				flags;
181 	struct sockaddr*	multicast_address; // for IP_MULTICAST_IF
182 
183 	IPv4MulticastFilter	multicast_filter;
184 };
185 
186 // protocol flags
187 #define IP_FLAG_HEADER_INCLUDED		0x01
188 #define IP_FLAG_RECEIVE_DEST_ADDR	0x02
189 
190 
191 static const int kDefaultTTL = 254;
192 static const int kDefaultMulticastTTL = 1;
193 
194 
195 extern net_protocol_module_info gIPv4Module;
196 	// we need this in ipv4_std_ops() for registering the AF_INET domain
197 
198 net_stack_module_info* gStackModule;
199 net_buffer_module_info* gBufferModule;
200 
201 static struct net_domain* sDomain;
202 static net_datalink_module_info* sDatalinkModule;
203 static net_socket_module_info* sSocketModule;
204 static int32 sPacketID;
205 static RawSocketList sRawSockets;
206 static mutex sRawSocketsLock;
207 static mutex sFragmentLock;
208 static FragmentTable sFragmentHash;
209 static mutex sMulticastGroupsLock;
210 
211 typedef MultiHashTable<MulticastStateHash> MulticastState;
212 static MulticastState* sMulticastState;
213 
214 static net_protocol_module_info* sReceivingProtocol[256];
215 static mutex sReceivingProtocolLock;
216 
217 
218 static const char*
219 print_address(const in_addr* address, char* buf, size_t bufLen)
220 {
221 	unsigned int addr = ntohl(address->s_addr);
222 
223 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
224 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
225 
226 	return buf;
227 }
228 
229 
230 RawSocket::RawSocket(net_socket* socket)
231 	:
232 	DatagramSocket<>("ipv4 raw socket", socket)
233 {
234 }
235 
236 
237 //	#pragma mark -
238 
239 
240 FragmentPacket::FragmentPacket(const ipv4_packet_key& key)
241 	:
242 	fKey(key),
243 	fIndex(0),
244 	fReceivedLastFragment(false),
245 	fBytesLeft(IP_MAXPACKET)
246 {
247 	gStackModule->init_timer(&fTimer, FragmentPacket::StaleTimer, this);
248 }
249 
250 
251 FragmentPacket::~FragmentPacket()
252 {
253 	// cancel the kill timer
254 	gStackModule->set_timer(&fTimer, -1);
255 
256 	// delete all fragments
257 	net_buffer* buffer;
258 	while ((buffer = fFragments.RemoveHead()) != NULL) {
259 		gBufferModule->free(buffer);
260 	}
261 }
262 
263 
264 status_t
265 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
266 	bool lastFragment)
267 {
268 	// restart the timer
269 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
270 
271 	if (start >= end) {
272 		// invalid fragment
273 		return B_BAD_DATA;
274 	}
275 
276 	// Search for a position in the list to insert the fragment
277 
278 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
279 	net_buffer* previous = NULL;
280 	net_buffer* next = NULL;
281 	while ((previous = iterator.Next()) != NULL) {
282 		if (previous->fragment.start <= start) {
283 			// The new fragment can be inserted after this one
284 			break;
285 		}
286 
287 		next = previous;
288 	}
289 
290 	// See if we already have the fragment's data
291 
292 	if (previous != NULL && previous->fragment.start <= start
293 		&& previous->fragment.end >= end) {
294 		// we do, so we can just drop this fragment
295 		gBufferModule->free(buffer);
296 		return B_OK;
297 	}
298 
299 	fIndex = buffer->index;
300 		// adopt the buffer's device index
301 
302 	TRACE("    previous: %p, next: %p", previous, next);
303 
304 	// If we have parts of the data already, truncate as needed
305 
306 	if (previous != NULL && previous->fragment.end > start) {
307 		TRACE("    remove header %d bytes", previous->fragment.end - start);
308 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
309 		start = previous->fragment.end;
310 	}
311 	if (next != NULL && next->fragment.start < end) {
312 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
313 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
314 		end = next->fragment.start;
315 	}
316 
317 	// Now try if we can already merge the fragments together
318 
319 	// We will always keep the last buffer received, so that we can still
320 	// report an error (in which case we're not responsible for freeing it)
321 
322 	if (previous != NULL && previous->fragment.end == start) {
323 		fFragments.Remove(previous);
324 
325 		buffer->fragment.start = previous->fragment.start;
326 		buffer->fragment.end = end;
327 
328 		status_t status = gBufferModule->merge(buffer, previous, false);
329 		TRACE("    merge previous: %s", strerror(status));
330 		if (status != B_OK) {
331 			fFragments.Insert(next, previous);
332 			return status;
333 		}
334 
335 		fFragments.Insert(next, buffer);
336 
337 		// cut down existing hole
338 		fBytesLeft -= end - start;
339 
340 		if (lastFragment && !fReceivedLastFragment) {
341 			fReceivedLastFragment = true;
342 			fBytesLeft -= IP_MAXPACKET - end;
343 		}
344 
345 		TRACE("    hole length: %d", (int)fBytesLeft);
346 
347 		return B_OK;
348 	} else if (next != NULL && next->fragment.start == end) {
349 		net_buffer* afterNext = (net_buffer*)next->link.next;
350 		fFragments.Remove(next);
351 
352 		buffer->fragment.start = start;
353 		buffer->fragment.end = next->fragment.end;
354 
355 		status_t status = gBufferModule->merge(buffer, next, true);
356 		TRACE("    merge next: %s", strerror(status));
357 		if (status != B_OK) {
358 			// Insert "next" at its previous position
359 			fFragments.Insert(afterNext, next);
360 			return status;
361 		}
362 
363 		fFragments.Insert(afterNext, buffer);
364 
365 		// cut down existing hole
366 		fBytesLeft -= end - start;
367 
368 		if (lastFragment && !fReceivedLastFragment) {
369 			fReceivedLastFragment = true;
370 			fBytesLeft -= IP_MAXPACKET - end;
371 		}
372 
373 		TRACE("    hole length: %d", (int)fBytesLeft);
374 
375 		return B_OK;
376 	}
377 
378 	// We couldn't merge the fragments, so we need to add it as is
379 
380 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
381 
382 	buffer->fragment.start = start;
383 	buffer->fragment.end = end;
384 	fFragments.Insert(next, buffer);
385 
386 	// update length of the hole, if any
387 	fBytesLeft -= end - start;
388 
389 	if (lastFragment && !fReceivedLastFragment) {
390 		fReceivedLastFragment = true;
391 		fBytesLeft -= IP_MAXPACKET - end;
392 	}
393 
394 	TRACE("    hole length: %d", (int)fBytesLeft);
395 
396 	return B_OK;
397 }
398 
399 
400 /*!	Reassembles the fragments to the specified buffer \a to.
401 	This buffer must have been added via AddFragment() before.
402 */
403 status_t
404 FragmentPacket::Reassemble(net_buffer* to)
405 {
406 	if (!IsComplete())
407 		return B_ERROR;
408 
409 	net_buffer* buffer = NULL;
410 
411 	net_buffer* fragment;
412 	while ((fragment = fFragments.RemoveHead()) != NULL) {
413 		if (buffer != NULL) {
414 			status_t status;
415 			if (to == fragment) {
416 				status = gBufferModule->merge(fragment, buffer, false);
417 				buffer = fragment;
418 			} else
419 				status = gBufferModule->merge(buffer, fragment, true);
420 			if (status != B_OK)
421 				return status;
422 		} else
423 			buffer = fragment;
424 	}
425 
426 	if (buffer != to)
427 		panic("ipv4 packet reassembly did not work correctly.");
428 
429 	to->index = fIndex;
430 		// reset the buffer's device index
431 
432 	return B_OK;
433 }
434 
435 
436 /*static*/ void
437 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
438 {
439 	FragmentPacket* packet = (FragmentPacket*)data;
440 	TRACE("Assembling FragmentPacket %p timed out!", packet);
441 
442 	MutexLocker locker(&sFragmentLock);
443 	sFragmentHash.Remove(packet);
444 	locker.Unlock();
445 
446 	if (!packet->fFragments.IsEmpty()) {
447 		// Send error: fragment reassembly time exceeded
448 		sDomain->module->error_reply(NULL, packet->fFragments.First(),
449 			B_NET_ERROR_REASSEMBLY_TIME_EXCEEDED, NULL);
450 	}
451 
452 	delete packet;
453 }
454 
455 
456 //	#pragma mark -
457 
458 
459 #ifdef TRACE_IPV4
460 static void
461 dump_ipv4_header(ipv4_header &header)
462 {
463 	struct pretty_ipv4 {
464 	#if B_HOST_IS_LENDIAN == 1
465 		uint8 a;
466 		uint8 b;
467 		uint8 c;
468 		uint8 d;
469 	#else
470 		uint8 d;
471 		uint8 c;
472 		uint8 b;
473 		uint8 a;
474 	#endif
475 	};
476 	struct pretty_ipv4* src = (struct pretty_ipv4*)&header.source;
477 	struct pretty_ipv4* dst = (struct pretty_ipv4*)&header.destination;
478 	dprintf("  version: %d\n", header.version);
479 	dprintf("  header_length: 4 * %d\n", header.header_length);
480 	dprintf("  service_type: %d\n", header.service_type);
481 	dprintf("  total_length: %d\n", header.TotalLength());
482 	dprintf("  id: %d\n", ntohs(header.id));
483 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
484 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
485 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
486 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
487 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
488 	dprintf("  time_to_live: %d\n", header.time_to_live);
489 	dprintf("  protocol: %d\n", header.protocol);
490 	dprintf("  checksum: %d\n", ntohs(header.checksum));
491 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
492 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
493 }
494 #endif	// TRACE_IPV4
495 
496 
497 static int
498 dump_ipv4_multicast(int argc, char** argv)
499 {
500 	MulticastState::Iterator it = sMulticastState->GetIterator();
501 
502 	while (it.HasNext()) {
503 		IPv4GroupInterface* state = it.Next();
504 
505 		char addressBuffer[64];
506 
507 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
508 			print_address(&state->Address(), addressBuffer,
509 			sizeof(addressBuffer)),
510 			state->Mode() == IPv4GroupInterface::kExclude
511 				? "Exclude" : "Include");
512 
513 		int count = 0;
514 		IPv4GroupInterface::AddressSet::Iterator it
515 			= state->Sources().GetIterator();
516 		while (it.HasNext()) {
517 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
518 				addressBuffer, sizeof(addressBuffer)));
519 			count++;
520 		}
521 
522 		kprintf("}> sock %p\n", state->Parent()->Socket());
523 	}
524 
525 	return 0;
526 }
527 
528 
529 /*!	Attempts to re-assemble fragmented packets.
530 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
531 		will point to its buffer, otherwise, it will be \c NULL.
532 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
533 */
534 static status_t
535 reassemble_fragments(const ipv4_header &header, net_buffer** _buffer)
536 {
537 	net_buffer* buffer = *_buffer;
538 	status_t status;
539 
540 	struct ipv4_packet_key key;
541 	key.source = (in_addr_t)header.source;
542 	key.destination = (in_addr_t)header.destination;
543 	key.id = header.id;
544 	key.protocol = header.protocol;
545 
546 	// TODO: Make locking finer grained.
547 	MutexLocker locker(&sFragmentLock);
548 
549 	FragmentPacket* packet = sFragmentHash.Lookup(key);
550 	if (packet == NULL) {
551 		// New fragment packet
552 		packet = new (std::nothrow) FragmentPacket(key);
553 		if (packet == NULL)
554 			return B_NO_MEMORY;
555 
556 		// add packet to hash
557 		status = sFragmentHash.Insert(packet);
558 		if (status != B_OK) {
559 			delete packet;
560 			return status;
561 		}
562 	}
563 
564 	uint16 fragmentOffset = header.FragmentOffset();
565 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
566 	uint16 end = start + header.TotalLength() - header.HeaderLength();
567 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
568 
569 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
570 		lastFragment ? "last ": "", end - start, start);
571 
572 	// Remove header unless this is the first fragment
573 	if (start != 0)
574 		gBufferModule->remove_header(buffer, header.HeaderLength());
575 
576 	status = packet->AddFragment(start, end, buffer, lastFragment);
577 	if (status != B_OK)
578 		return status;
579 
580 	if (packet->IsComplete()) {
581 		sFragmentHash.Remove(packet);
582 			// no matter if reassembling succeeds, we won't need this packet
583 			// anymore
584 
585 		status = packet->Reassemble(buffer);
586 		delete packet;
587 
588 		// _buffer does not change
589 		return status;
590 	}
591 
592 	// This indicates that the packet is not yet complete
593 	*_buffer = NULL;
594 	return B_OK;
595 }
596 
597 
598 /*!	Fragments the incoming buffer and send all fragments via the specified
599 	\a route.
600 */
601 static status_t
602 send_fragments(ipv4_protocol* protocol, struct net_route* route,
603 	net_buffer* buffer, uint32 mtu)
604 {
605 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
606 
607 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
608 	if (originalHeader.Status() != B_OK)
609 		return originalHeader.Status();
610 
611 	uint16 headerLength = originalHeader->HeaderLength();
612 	uint32 bytesLeft = buffer->size - headerLength;
613 	uint32 fragmentOffset = 0;
614 	status_t status = B_OK;
615 
616 	net_buffer* headerBuffer = gBufferModule->split(buffer, headerLength);
617 	if (headerBuffer == NULL)
618 		return B_NO_MEMORY;
619 
620 	// TODO: we need to make sure ipv4_header is contiguous or
621 	// use another construct.
622 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
623 	ipv4_header* header = &bufferHeader.Data();
624 
625 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
626 	// this way)
627 	mtu -= headerLength;
628 	mtu &= ~7;
629 	TRACE("  adjusted MTU to %ld, bytesLeft %ld", mtu, bytesLeft);
630 
631 	while (bytesLeft > 0) {
632 		uint32 fragmentLength = min_c(bytesLeft, mtu);
633 		bytesLeft -= fragmentLength;
634 		bool lastFragment = bytesLeft == 0;
635 
636 		header->total_length = htons(fragmentLength + headerLength);
637 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
638 			| (fragmentOffset >> 3));
639 		header->checksum = 0;
640 		header->checksum = gStackModule->checksum((uint8*)header,
641 			headerLength);
642 			// TODO: compute the checksum only for those parts that changed?
643 
644 		TRACE("  send fragment of %ld bytes (%ld bytes left)", fragmentLength,
645 			bytesLeft);
646 
647 		net_buffer* fragmentBuffer;
648 		if (!lastFragment) {
649 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
650 			fragmentOffset += fragmentLength;
651 		} else
652 			fragmentBuffer = buffer;
653 
654 		if (fragmentBuffer == NULL) {
655 			status = B_NO_MEMORY;
656 			break;
657 		}
658 
659 		// copy header to fragment
660 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
661 
662 		// send fragment
663 		if (status == B_OK)
664 			status = sDatalinkModule->send_routed_data(route, fragmentBuffer);
665 
666 		if (lastFragment) {
667 			// we don't own the last buffer, so we don't have to free it
668 			break;
669 		}
670 
671 		if (status != B_OK) {
672 			gBufferModule->free(fragmentBuffer);
673 			break;
674 		}
675 	}
676 
677 	gBufferModule->free(headerBuffer);
678 	return status;
679 }
680 
681 
682 /*!	Delivers the provided \a buffer to all listeners of this multicast group.
683 	Does not take over ownership of the buffer.
684 */
685 static bool
686 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
687 	bool deliverToRaw)
688 {
689 	if (module->deliver_data == NULL)
690 		return false;
691 
692 	// TODO: fix multicast!
693 	return false;
694 	MutexLocker _(sMulticastGroupsLock);
695 
696 	sockaddr_in* multicastAddr = (sockaddr_in*)buffer->destination;
697 
698 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
699 		&multicastAddr->sin_addr, buffer->interface_address->interface->index));
700 
701 	size_t count = 0;
702 
703 	while (it.HasNext()) {
704 		IPv4GroupInterface* state = it.Next();
705 
706 		ipv4_protocol* ipProtocol = state->Parent()->Socket();
707 		if (deliverToRaw && (ipProtocol->raw == NULL
708 				|| ipProtocol->socket->protocol != buffer->protocol))
709 			continue;
710 
711 		if (state->FilterAccepts(buffer)) {
712 			net_protocol* protocol = ipProtocol;
713 			if (protocol->module != module) {
714 				// as multicast filters are installed with an IPv4 protocol
715 				// reference, we need to go and find the appropriate instance
716 				// related to the 'receiving protocol' with module 'module'.
717 				net_protocol* protocol = ipProtocol->socket->first_protocol;
718 
719 				while (protocol != NULL && protocol->module != module)
720 					protocol = protocol->next;
721 			}
722 
723 			if (protocol != NULL) {
724 				module->deliver_data(protocol, buffer);
725 				count++;
726 			}
727 		}
728 	}
729 
730 	return count > 0;
731 }
732 
733 
734 /*!	Delivers the buffer to all listening raw sockets without taking ownership of
735 	the provided \a buffer.
736 	Returns \c true if there was any receiver, \c false if not.
737 */
738 static bool
739 raw_receive_data(net_buffer* buffer)
740 {
741 	MutexLocker locker(sRawSocketsLock);
742 
743 	if (sRawSockets.IsEmpty())
744 		return false;
745 
746 	TRACE("RawReceiveData(%i)", buffer->protocol);
747 
748 	if ((buffer->flags & MSG_MCAST) != 0) {
749 		// we need to call deliver_multicast here separately as
750 		// buffer still has the IP header, and it won't in the
751 		// next call. This isn't very optimized but works for now.
752 		// A better solution would be to hold separate hash tables
753 		// and lists for RAW and non-RAW sockets.
754 		return deliver_multicast(&gIPv4Module, buffer, true);
755 	}
756 
757 	RawSocketList::Iterator iterator = sRawSockets.GetIterator();
758 	size_t count = 0;
759 
760 	while (iterator.HasNext()) {
761 		RawSocket* raw = iterator.Next();
762 
763 		if (raw->Socket()->protocol == buffer->protocol) {
764 			raw->EnqueueClone(buffer);
765 			count++;
766 		}
767 	}
768 
769 	return count > 0;
770 }
771 
772 
773 static inline sockaddr*
774 fill_sockaddr_in(sockaddr_in* target, in_addr_t address)
775 {
776 	target->sin_family = AF_INET;
777 	target->sin_len = sizeof(sockaddr_in);
778 	target->sin_port = 0;
779 	target->sin_addr.s_addr = address;
780 	return (sockaddr*)target;
781 }
782 
783 
784 static status_t
785 get_int_option(void* target, size_t length, int value)
786 {
787 	if (length != sizeof(int))
788 		return B_BAD_VALUE;
789 
790 	return user_memcpy(target, &value, sizeof(int));
791 }
792 
793 
794 template<typename Type> static status_t
795 set_int_option(Type &target, const void* _value, size_t length)
796 {
797 	int value;
798 
799 	if (length != sizeof(int))
800 		return B_BAD_VALUE;
801 
802 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
803 		return B_BAD_ADDRESS;
804 
805 	target = value;
806 	return B_OK;
807 }
808 
809 
810 static net_protocol_module_info*
811 receiving_protocol(uint8 protocol)
812 {
813 	net_protocol_module_info* module = sReceivingProtocol[protocol];
814 	if (module != NULL)
815 		return module;
816 
817 	MutexLocker locker(sReceivingProtocolLock);
818 
819 	module = sReceivingProtocol[protocol];
820 	if (module != NULL)
821 		return module;
822 
823 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
824 			&module) == B_OK)
825 		sReceivingProtocol[protocol] = module;
826 
827 	return module;
828 }
829 
830 
831 // #pragma mark - multicast
832 
833 
834 status_t
835 IPv4Multicast::JoinGroup(IPv4GroupInterface* state)
836 {
837 	MutexLocker _(sMulticastGroupsLock);
838 
839 	sockaddr_in groupAddr;
840 	status_t status = sDatalinkModule->join_multicast(state->Interface(),
841 		sDomain, fill_sockaddr_in(&groupAddr, state->Address().s_addr));
842 	if (status != B_OK)
843 		return status;
844 
845 	sMulticastState->Insert(state);
846 	return B_OK;
847 }
848 
849 
850 status_t
851 IPv4Multicast::LeaveGroup(IPv4GroupInterface* state)
852 {
853 	MutexLocker _(sMulticastGroupsLock);
854 
855 	sMulticastState->Remove(state);
856 
857 	sockaddr_in groupAddr;
858 	return sDatalinkModule->leave_multicast(state->Interface(), sDomain,
859 		fill_sockaddr_in(&groupAddr, state->Address().s_addr));
860 }
861 
862 
863 static status_t
864 ipv4_delta_group(IPv4GroupInterface* group, int option,
865 	net_interface* interface, const in_addr* sourceAddr)
866 {
867 	switch (option) {
868 		case IP_ADD_MEMBERSHIP:
869 			return group->Add();
870 		case IP_DROP_MEMBERSHIP:
871 			return group->Drop();
872 		case IP_BLOCK_SOURCE:
873 			return group->BlockSource(*sourceAddr);
874 		case IP_UNBLOCK_SOURCE:
875 			return group->UnblockSource(*sourceAddr);
876 		case IP_ADD_SOURCE_MEMBERSHIP:
877 			return group->AddSSM(*sourceAddr);
878 		case IP_DROP_SOURCE_MEMBERSHIP:
879 			return group->DropSSM(*sourceAddr);
880 	}
881 
882 	return B_ERROR;
883 }
884 
885 
886 static status_t
887 ipv4_delta_membership(ipv4_protocol* protocol, int option,
888 	net_interface* interface, const in_addr* groupAddr,
889 	const in_addr* sourceAddr)
890 {
891 	IPv4MulticastFilter& filter = protocol->multicast_filter;
892 	IPv4GroupInterface* state = NULL;
893 	status_t status = B_OK;
894 
895 	switch (option) {
896 		case IP_ADD_MEMBERSHIP:
897 		case IP_ADD_SOURCE_MEMBERSHIP:
898 			status = filter.GetState(*groupAddr, interface, state, true);
899 			break;
900 
901 		case IP_DROP_MEMBERSHIP:
902 		case IP_BLOCK_SOURCE:
903 		case IP_UNBLOCK_SOURCE:
904 		case IP_DROP_SOURCE_MEMBERSHIP:
905 			filter.GetState(*groupAddr, interface, state, false);
906 			if (state == NULL) {
907 				if (option == IP_DROP_MEMBERSHIP
908 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
909 					return EADDRNOTAVAIL;
910 
911 				return B_BAD_VALUE;
912 			}
913 			break;
914 	}
915 
916 	if (status != B_OK)
917 		return status;
918 
919 	status = ipv4_delta_group(state, option, interface, sourceAddr);
920 	filter.ReturnState(state);
921 	return status;
922 }
923 
924 
925 static int
926 generic_to_ipv4(int option)
927 {
928 	switch (option) {
929 		case MCAST_JOIN_GROUP:
930 			return IP_ADD_MEMBERSHIP;
931 		case MCAST_JOIN_SOURCE_GROUP:
932 			return IP_ADD_SOURCE_MEMBERSHIP;
933 		case MCAST_LEAVE_GROUP:
934 			return IP_DROP_MEMBERSHIP;
935 		case MCAST_BLOCK_SOURCE:
936 			return IP_BLOCK_SOURCE;
937 		case MCAST_UNBLOCK_SOURCE:
938 			return IP_UNBLOCK_SOURCE;
939 		case MCAST_LEAVE_SOURCE_GROUP:
940 			return IP_DROP_SOURCE_MEMBERSHIP;
941 	}
942 
943 	return -1;
944 }
945 
946 
947 static net_interface*
948 get_multicast_interface(ipv4_protocol* protocol, const in_addr* address)
949 {
950 	// TODO: this is broken and leaks references
951 	sockaddr_in groupAddr;
952 	net_route* route = sDatalinkModule->get_route(sDomain,
953 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
954 	if (route == NULL)
955 		return NULL;
956 
957 	return route->interface_address->interface;
958 }
959 
960 
961 static status_t
962 ipv4_delta_membership(ipv4_protocol* protocol, int option,
963 	in_addr* interfaceAddr, in_addr* groupAddr, in_addr* sourceAddr)
964 {
965 	net_interface* interface = NULL;
966 
967 	if (interfaceAddr->s_addr == INADDR_ANY) {
968 		interface = get_multicast_interface(protocol, groupAddr);
969 	} else {
970 		sockaddr_in address;
971 		interface = sDatalinkModule->get_interface_with_address(
972 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
973 	}
974 
975 	if (interface == NULL)
976 		return B_DEVICE_NOT_FOUND;
977 
978 	return ipv4_delta_membership(protocol, option, interface,
979 		groupAddr, sourceAddr);
980 }
981 
982 
983 static status_t
984 ipv4_generic_delta_membership(ipv4_protocol* protocol, int option,
985 	uint32 index, const sockaddr_storage* _groupAddr,
986 	const sockaddr_storage* _sourceAddr)
987 {
988 	if (_groupAddr->ss_family != AF_INET
989 		|| (_sourceAddr != NULL && _sourceAddr->ss_family != AF_INET))
990 		return B_BAD_VALUE;
991 
992 	const in_addr* groupAddr = &((const sockaddr_in*)_groupAddr)->sin_addr;
993 
994 	// TODO: this is broken and leaks references
995 	net_interface* interface;
996 	if (index == 0)
997 		interface = get_multicast_interface(protocol, groupAddr);
998 	else
999 		interface = sDatalinkModule->get_interface(sDomain, index);
1000 
1001 	if (interface == NULL)
1002 		return B_DEVICE_NOT_FOUND;
1003 
1004 	const in_addr* sourceAddr = NULL;
1005 	if (_sourceAddr != NULL)
1006 		sourceAddr = &((const sockaddr_in*)_sourceAddr)->sin_addr;
1007 
1008 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
1009 		groupAddr, sourceAddr);
1010 }
1011 
1012 
1013 //	#pragma mark - module interface
1014 
1015 
1016 net_protocol*
1017 ipv4_init_protocol(net_socket* socket)
1018 {
1019 	ipv4_protocol* protocol = new (std::nothrow) ipv4_protocol();
1020 	if (protocol == NULL)
1021 		return NULL;
1022 
1023 	protocol->raw = NULL;
1024 	protocol->service_type = 0;
1025 	protocol->time_to_live = kDefaultTTL;
1026 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
1027 	protocol->flags = 0;
1028 	protocol->multicast_address = NULL;
1029 	return protocol;
1030 }
1031 
1032 
1033 status_t
1034 ipv4_uninit_protocol(net_protocol* _protocol)
1035 {
1036 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1037 
1038 	delete protocol;
1039 
1040 	return B_OK;
1041 }
1042 
1043 
1044 /*!	Since open() is only called on the top level protocol, when we get here
1045 	it means we are on a SOCK_RAW socket.
1046 */
1047 status_t
1048 ipv4_open(net_protocol* _protocol)
1049 {
1050 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1051 
1052 	// Only root may open raw sockets
1053 	if (geteuid() != 0)
1054 		return B_NOT_ALLOWED;
1055 
1056 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1057 	if (raw == NULL)
1058 		return B_NO_MEMORY;
1059 
1060 	status_t status = raw->InitCheck();
1061 	if (status != B_OK) {
1062 		delete raw;
1063 		return status;
1064 	}
1065 
1066 	TRACE_SK(protocol, "Open()");
1067 
1068 	protocol->raw = raw;
1069 
1070 	MutexLocker locker(sRawSocketsLock);
1071 	sRawSockets.Add(raw);
1072 	return B_OK;
1073 }
1074 
1075 
1076 status_t
1077 ipv4_close(net_protocol* _protocol)
1078 {
1079 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1080 	RawSocket* raw = protocol->raw;
1081 	if (raw == NULL)
1082 		return B_ERROR;
1083 
1084 	TRACE_SK(protocol, "Close()");
1085 
1086 	MutexLocker locker(sRawSocketsLock);
1087 	sRawSockets.Remove(raw);
1088 	delete raw;
1089 	protocol->raw = NULL;
1090 
1091 	return B_OK;
1092 }
1093 
1094 
1095 status_t
1096 ipv4_free(net_protocol* protocol)
1097 {
1098 	return B_OK;
1099 }
1100 
1101 
1102 status_t
1103 ipv4_connect(net_protocol* protocol, const struct sockaddr* address)
1104 {
1105 	return B_ERROR;
1106 }
1107 
1108 
1109 status_t
1110 ipv4_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1111 {
1112 	return B_NOT_SUPPORTED;
1113 }
1114 
1115 
1116 status_t
1117 ipv4_control(net_protocol* _protocol, int level, int option, void* value,
1118 	size_t* _length)
1119 {
1120 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1121 		return sDatalinkModule->control(sDomain, option, value, _length);
1122 
1123 	return B_BAD_VALUE;
1124 }
1125 
1126 
1127 status_t
1128 ipv4_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1129 	int* _length)
1130 {
1131 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1132 
1133 	if (level == IPPROTO_IP) {
1134 		if (option == IP_HDRINCL) {
1135 			return get_int_option(value, *_length,
1136 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1137 		}
1138 		if (option == IP_RECVDSTADDR) {
1139 			return get_int_option(value, *_length,
1140 				(protocol->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0);
1141 		}
1142 		if (option == IP_TTL)
1143 			return get_int_option(value, *_length, protocol->time_to_live);
1144 		if (option == IP_TOS)
1145 			return get_int_option(value, *_length, protocol->service_type);
1146 		if (option == IP_MULTICAST_TTL) {
1147 			return get_int_option(value, *_length,
1148 				protocol->multicast_time_to_live);
1149 		}
1150 		if (option == IP_ADD_MEMBERSHIP
1151 			|| option == IP_DROP_MEMBERSHIP
1152 			|| option == IP_BLOCK_SOURCE
1153 			|| option == IP_UNBLOCK_SOURCE
1154 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1155 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1156 			|| option == MCAST_JOIN_GROUP
1157 			|| option == MCAST_LEAVE_GROUP
1158 			|| option == MCAST_BLOCK_SOURCE
1159 			|| option == MCAST_UNBLOCK_SOURCE
1160 			|| option == MCAST_JOIN_SOURCE_GROUP
1161 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1162 			// RFC 3678, Section 4.1:
1163 			// ``An error of EOPNOTSUPP is returned if these options are
1164 			// used with getsockopt().''
1165 			return B_NOT_SUPPORTED;
1166 		}
1167 
1168 		dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1169 		return ENOPROTOOPT;
1170 	}
1171 
1172 	return sSocketModule->get_option(protocol->socket, level, option, value,
1173 		_length);
1174 }
1175 
1176 
1177 status_t
1178 ipv4_setsockopt(net_protocol* _protocol, int level, int option,
1179 	const void* value, int length)
1180 {
1181 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1182 
1183 	if (level == IPPROTO_IP) {
1184 		if (option == IP_HDRINCL) {
1185 			int headerIncluded;
1186 			if (length != sizeof(int))
1187 				return B_BAD_VALUE;
1188 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded))
1189 					!= B_OK)
1190 				return B_BAD_ADDRESS;
1191 
1192 			if (headerIncluded)
1193 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1194 			else
1195 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1196 
1197 			return B_OK;
1198 		}
1199 		if (option == IP_RECVDSTADDR) {
1200 			int getAddress;
1201 			if (length != sizeof(int))
1202 				return B_BAD_VALUE;
1203 			if (user_memcpy(&getAddress, value, sizeof(int)) != B_OK)
1204 				return B_BAD_ADDRESS;
1205 
1206 			if (getAddress && (protocol->socket->type == SOCK_DGRAM
1207 					|| protocol->socket->type == SOCK_RAW))
1208 				protocol->flags |= IP_FLAG_RECEIVE_DEST_ADDR;
1209 			else
1210 				protocol->flags &= ~IP_FLAG_RECEIVE_DEST_ADDR;
1211 
1212 			return B_OK;
1213 		}
1214 		if (option == IP_TTL)
1215 			return set_int_option(protocol->time_to_live, value, length);
1216 		if (option == IP_TOS)
1217 			return set_int_option(protocol->service_type, value, length);
1218 		if (option == IP_MULTICAST_IF) {
1219 			if (length != sizeof(struct in_addr))
1220 				return B_BAD_VALUE;
1221 
1222 			struct sockaddr_in* address = new (std::nothrow) sockaddr_in;
1223 			if (address == NULL)
1224 				return B_NO_MEMORY;
1225 
1226 			if (user_memcpy(&address->sin_addr, value, sizeof(struct in_addr))
1227 					!= B_OK) {
1228 				delete address;
1229 				return B_BAD_ADDRESS;
1230 			}
1231 
1232 			// Using INADDR_ANY to remove the previous setting.
1233 			if (address->sin_addr.s_addr == htonl(INADDR_ANY)) {
1234 				delete address;
1235 				delete protocol->multicast_address;
1236 				protocol->multicast_address = NULL;
1237 				return B_OK;
1238 			}
1239 
1240 			struct net_interface* interface
1241 				= sDatalinkModule->get_interface_with_address(
1242 					(sockaddr*)address);
1243 			if (interface == NULL) {
1244 				delete address;
1245 				return EADDRNOTAVAIL;
1246 			}
1247 
1248 			delete protocol->multicast_address;
1249 			protocol->multicast_address = (struct sockaddr*)address;
1250 
1251 			sDatalinkModule->put_interface(interface);
1252 			return B_OK;
1253 		}
1254 		if (option == IP_MULTICAST_TTL) {
1255 			return set_int_option(protocol->multicast_time_to_live, value,
1256 				length);
1257 		}
1258 		if (option == IP_ADD_MEMBERSHIP || option == IP_DROP_MEMBERSHIP) {
1259 			ip_mreq mreq;
1260 			if (length != sizeof(ip_mreq))
1261 				return B_BAD_VALUE;
1262 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) != B_OK)
1263 				return B_BAD_ADDRESS;
1264 
1265 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1266 				&mreq.imr_multiaddr, NULL);
1267 		}
1268 		if (option == IP_BLOCK_SOURCE
1269 			|| option == IP_UNBLOCK_SOURCE
1270 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1271 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1272 			ip_mreq_source mreq;
1273 			if (length != sizeof(ip_mreq_source))
1274 				return B_BAD_VALUE;
1275 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) != B_OK)
1276 				return B_BAD_ADDRESS;
1277 
1278 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1279 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1280 		}
1281 		if (option == MCAST_LEAVE_GROUP || option == MCAST_JOIN_GROUP) {
1282 			group_req greq;
1283 			if (length != sizeof(group_req))
1284 				return B_BAD_VALUE;
1285 			if (user_memcpy(&greq, value, sizeof(group_req)) != B_OK)
1286 				return B_BAD_ADDRESS;
1287 
1288 			return ipv4_generic_delta_membership(protocol, option,
1289 				greq.gr_interface, &greq.gr_group, NULL);
1290 		}
1291 		if (option == MCAST_BLOCK_SOURCE
1292 			|| option == MCAST_UNBLOCK_SOURCE
1293 			|| option == MCAST_JOIN_SOURCE_GROUP
1294 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1295 			group_source_req greq;
1296 			if (length != sizeof(group_source_req))
1297 				return B_BAD_VALUE;
1298 			if (user_memcpy(&greq, value, sizeof(group_source_req)) != B_OK)
1299 				return B_BAD_ADDRESS;
1300 
1301 			return ipv4_generic_delta_membership(protocol, option,
1302 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1303 		}
1304 
1305 		dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1306 		return ENOPROTOOPT;
1307 	}
1308 
1309 	return sSocketModule->set_option(protocol->socket, level, option,
1310 		value, length);
1311 }
1312 
1313 
1314 status_t
1315 ipv4_bind(net_protocol* protocol, const struct sockaddr* address)
1316 {
1317 	if (address->sa_family != AF_INET)
1318 		return EAFNOSUPPORT;
1319 
1320 	// only INADDR_ANY and addresses of local interfaces are accepted:
1321 	if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY
1322 		|| IN_MULTICAST(ntohl(((sockaddr_in*)address)->sin_addr.s_addr))
1323 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1324 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1325 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1326 			// explicitly set length, as our callers can't be trusted to
1327 			// always provide the correct length!
1328 		return B_OK;
1329 	}
1330 
1331 	return B_ERROR;
1332 		// address is unknown on this host
1333 }
1334 
1335 
1336 status_t
1337 ipv4_unbind(net_protocol* protocol, struct sockaddr* address)
1338 {
1339 	// nothing to do here
1340 	return B_OK;
1341 }
1342 
1343 
1344 status_t
1345 ipv4_listen(net_protocol* protocol, int count)
1346 {
1347 	return B_NOT_SUPPORTED;
1348 }
1349 
1350 
1351 status_t
1352 ipv4_shutdown(net_protocol* protocol, int direction)
1353 {
1354 	return B_NOT_SUPPORTED;
1355 }
1356 
1357 
1358 status_t
1359 ipv4_send_routed_data(net_protocol* _protocol, struct net_route* route,
1360 	net_buffer* buffer)
1361 {
1362 	if (route == NULL)
1363 		return B_BAD_VALUE;
1364 
1365 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1366 	net_interface_address* interfaceAddress = route->interface_address;
1367 	net_interface* interface = interfaceAddress->interface;
1368 
1369 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1370 		buffer->size);
1371 
1372 	sockaddr_in& source = *(sockaddr_in*)buffer->source;
1373 	sockaddr_in& destination = *(sockaddr_in*)buffer->destination;
1374 	sockaddr_in* broadcastAddress = (sockaddr_in*)interfaceAddress->destination;
1375 
1376 	bool checksumNeeded = true;
1377 	bool headerIncluded = false;
1378 	if (protocol != NULL)
1379 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1380 
1381 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1382 
1383 	if (destination.sin_addr.s_addr == INADDR_ANY)
1384 		return EDESTADDRREQ;
1385 
1386 	if ((interface->device->flags & IFF_BROADCAST) != 0
1387 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1388 			|| (broadcastAddress != NULL && destination.sin_addr.s_addr
1389 					== broadcastAddress->sin_addr.s_addr))) {
1390 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1391 			return B_BAD_VALUE;
1392 		buffer->flags |= MSG_BCAST;
1393 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr)))
1394 		buffer->flags |= MSG_MCAST;
1395 
1396 	// Add IP header (if needed)
1397 
1398 	if (!headerIncluded) {
1399 		NetBufferPrepend<ipv4_header> header(buffer);
1400 		if (header.Status() != B_OK)
1401 			return header.Status();
1402 
1403 		header->version = IPV4_VERSION;
1404 		header->header_length = sizeof(ipv4_header) / 4;
1405 		header->service_type = protocol ? protocol->service_type : 0;
1406 		header->total_length = htons(buffer->size);
1407 		header->id = htons(atomic_add(&sPacketID, 1));
1408 		header->fragment_offset = 0;
1409 		if (protocol) {
1410 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1411 				? protocol->multicast_time_to_live : protocol->time_to_live;
1412 		} else {
1413 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1414 				? kDefaultMulticastTTL : kDefaultTTL;
1415 		}
1416 		header->protocol = protocol
1417 			? protocol->socket->protocol : buffer->protocol;
1418 		header->checksum = 0;
1419 
1420 		header->source = source.sin_addr.s_addr;
1421 		header->destination = destination.sin_addr.s_addr;
1422 
1423 		TRACE_ONLY(dump_ipv4_header(*header));
1424 	} else {
1425 		// if IP_HDRINCL, check if the source address is set
1426 		NetBufferHeaderReader<ipv4_header> header(buffer);
1427 		if (header.Status() != B_OK)
1428 			return header.Status();
1429 
1430 		if (header->source == 0) {
1431 			header->source = source.sin_addr.s_addr;
1432 			header->checksum = 0;
1433 			header.Sync();
1434 		} else
1435 			checksumNeeded = false;
1436 
1437 		TRACE("  Header was already supplied:");
1438 		TRACE_ONLY(dump_ipv4_header(*header));
1439 	}
1440 
1441 	if (buffer->size > 0xffff)
1442 		return EMSGSIZE;
1443 
1444 	if (checksumNeeded) {
1445 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1446 			sizeof(ipv4_header), true);
1447 	}
1448 
1449 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer "
1450 		"checksum: %ld",
1451 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1452 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1453 
1454 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08x",
1455 		ntohl(destination.sin_addr.s_addr));
1456 
1457 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1458 	if (buffer->size > mtu) {
1459 		// we need to fragment the packet
1460 		return send_fragments(protocol, route, buffer, mtu);
1461 	}
1462 
1463 	return sDatalinkModule->send_routed_data(route, buffer);
1464 }
1465 
1466 
1467 status_t
1468 ipv4_send_data(net_protocol* _protocol, net_buffer* buffer)
1469 {
1470 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1471 
1472 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1473 
1474 	if (protocol != NULL && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1475 		if (buffer->size < sizeof(ipv4_header))
1476 			return B_BAD_VALUE;
1477 
1478 		sockaddr_in* source = (sockaddr_in*)buffer->source;
1479 		sockaddr_in* destination = (sockaddr_in*)buffer->destination;
1480 
1481 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1482 			offsetof(ipv4_header, source)>(buffer));
1483 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1484 			offsetof(ipv4_header, destination)>(buffer));
1485 	}
1486 
1487 	// handle IP_MULTICAST_IF
1488 	if (IN_MULTICAST(ntohl(
1489 			((sockaddr_in*)buffer->destination)->sin_addr.s_addr))
1490 		&& protocol != NULL && protocol->multicast_address != NULL) {
1491 		net_interface_address* address = sDatalinkModule->get_interface_address(
1492 			protocol->multicast_address);
1493 		if (address == NULL || (address->interface->flags & IFF_UP) == 0) {
1494 			sDatalinkModule->put_interface_address(address);
1495 			return EADDRNOTAVAIL;
1496 		}
1497 
1498 		sDatalinkModule->put_interface_address(buffer->interface_address);
1499 		buffer->interface_address = address;
1500 			// the buffer takes over ownership of the address
1501 
1502 		net_route* route = sDatalinkModule->get_route(sDomain, address->local);
1503 		if (route == NULL)
1504 			return ENETUNREACH;
1505 
1506 		return sDatalinkModule->send_routed_data(route, buffer);
1507 	}
1508 
1509 	return sDatalinkModule->send_data(protocol, sDomain, buffer);
1510 }
1511 
1512 
1513 ssize_t
1514 ipv4_send_avail(net_protocol* protocol)
1515 {
1516 	return B_ERROR;
1517 }
1518 
1519 
1520 status_t
1521 ipv4_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1522 	net_buffer** _buffer)
1523 {
1524 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1525 	RawSocket* raw = protocol->raw;
1526 	if (raw == NULL)
1527 		return B_ERROR;
1528 
1529 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1530 
1531 	return raw->Dequeue(flags, _buffer);
1532 }
1533 
1534 
1535 ssize_t
1536 ipv4_read_avail(net_protocol* _protocol)
1537 {
1538 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1539 	RawSocket* raw = protocol->raw;
1540 	if (raw == NULL)
1541 		return B_ERROR;
1542 
1543 	return raw->AvailableData();
1544 }
1545 
1546 
1547 struct net_domain*
1548 ipv4_get_domain(net_protocol* protocol)
1549 {
1550 	return sDomain;
1551 }
1552 
1553 
1554 size_t
1555 ipv4_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1556 {
1557 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1558 	if (route == NULL)
1559 		return 0;
1560 
1561 	size_t mtu;
1562 	if (route->mtu != 0)
1563 		mtu = route->mtu;
1564 	else
1565 		mtu = route->interface_address->interface->mtu;
1566 
1567 	sDatalinkModule->put_route(sDomain, route);
1568 	return mtu - sizeof(ipv4_header);
1569 }
1570 
1571 
1572 status_t
1573 ipv4_receive_data(net_buffer* buffer)
1574 {
1575 	TRACE("ipv4_receive_data(%p [%ld bytes])", buffer, buffer->size);
1576 
1577 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1578 	if (bufferHeader.Status() != B_OK)
1579 		return bufferHeader.Status();
1580 
1581 	ipv4_header& header = bufferHeader.Data();
1582 	TRACE_ONLY(dump_ipv4_header(header));
1583 
1584 	if (header.version != IPV4_VERSION)
1585 		return B_BAD_TYPE;
1586 
1587 	uint16 packetLength = header.TotalLength();
1588 	uint16 headerLength = header.HeaderLength();
1589 	if (packetLength > buffer->size
1590 		|| headerLength < sizeof(ipv4_header))
1591 		return B_BAD_DATA;
1592 
1593 	// TODO: would be nice to have a direct checksum function somewhere
1594 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1595 		return B_BAD_DATA;
1596 
1597 	// lower layers notion of broadcast or multicast have no relevance to us
1598 	// other than deciding whether to send an ICMP error
1599 	bool wasMulticast = (buffer->flags & (MSG_BCAST | MSG_MCAST)) != 0;
1600 	bool notForUs = false;
1601 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1602 
1603 	sockaddr_in destination;
1604 	fill_sockaddr_in(&destination, header.destination);
1605 
1606 	if (header.destination == INADDR_BROADCAST) {
1607 		buffer->flags |= MSG_BCAST;
1608 
1609 		// Find first interface with a matching family
1610 		if (!sDatalinkModule->is_local_link_address(sDomain, true,
1611 				buffer->destination, &buffer->interface_address))
1612 			notForUs = !wasMulticast;
1613 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1614 		buffer->flags |= MSG_MCAST;
1615 	} else {
1616 		uint32 matchedAddressType = 0;
1617 
1618 		// test if the packet is really for us
1619 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1620 				&buffer->interface_address, &matchedAddressType)
1621 			&& !sDatalinkModule->is_local_link_address(sDomain, true,
1622 				buffer->destination, &buffer->interface_address)) {
1623 			// if the buffer was a link layer multicast, regard it as a
1624 			// broadcast, and let the upper levels decide what to do with it
1625 			if (wasMulticast)
1626 				buffer->flags |= MSG_BCAST;
1627 			else
1628 				notForUs = true;
1629 		} else {
1630 			// copy over special address types (MSG_BCAST or MSG_MCAST):
1631 			buffer->flags |= matchedAddressType;
1632 		}
1633 	}
1634 
1635 	// set net_buffer's source/destination address
1636 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1637 	memcpy(buffer->destination, &destination, sizeof(sockaddr_in));
1638 
1639 	buffer->protocol = header.protocol;
1640 
1641 	if (notForUs) {
1642 		TRACE("  ipv4_receive_data(): packet was not for us %x -> %x",
1643 			ntohl(header.source), ntohl(header.destination));
1644 
1645 		if (!wasMulticast) {
1646 			// Send ICMP error: Host unreachable
1647 			sDomain->module->error_reply(NULL, buffer, B_NET_ERROR_UNREACH_HOST,
1648 				NULL);
1649 		}
1650 
1651 		return B_ERROR;
1652 	}
1653 
1654 	// remove any trailing/padding data
1655 	status_t status = gBufferModule->trim(buffer, packetLength);
1656 	if (status != B_OK)
1657 		return status;
1658 
1659 	// check for fragmentation
1660 	uint16 fragmentOffset = header.FragmentOffset();
1661 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1662 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1663 		// this is a fragment
1664 		TRACE("  ipv4_receive_data(): Found a Fragment!");
1665 		status = reassemble_fragments(header, &buffer);
1666 		TRACE("  ipv4_receive_data():  -> %s", strerror(status));
1667 		if (status != B_OK)
1668 			return status;
1669 
1670 		if (buffer == NULL) {
1671 			// buffer was put into fragment packet
1672 			TRACE("  ipv4_receive_data(): Not yet assembled.");
1673 			return B_OK;
1674 		}
1675 	}
1676 
1677 	// Since the buffer might have been changed (reassembled fragment)
1678 	// we must no longer access bufferHeader or header anymore after
1679 	// this point
1680 
1681 	bool rawDelivered = raw_receive_data(buffer);
1682 
1683 	// Preserve the ipv4 header for ICMP processing
1684 	gBufferModule->store_header(buffer);
1685 
1686 	bufferHeader.Remove(headerLength);
1687 		// the header is of variable size and may include IP options
1688 		// (TODO: that we ignore for now)
1689 
1690 	net_protocol_module_info* module = receiving_protocol(buffer->protocol);
1691 	if (module == NULL) {
1692 		// no handler for this packet
1693 		if (!rawDelivered) {
1694 			sDomain->module->error_reply(NULL, buffer,
1695 				B_NET_ERROR_UNREACH_PROTOCOL, NULL);
1696 		}
1697 		return EAFNOSUPPORT;
1698 	}
1699 
1700 	if ((buffer->flags & MSG_MCAST) != 0) {
1701 		// Unfortunately historical reasons dictate that the IP multicast
1702 		// model be a little different from the unicast one. We deliver
1703 		// this frame directly to all sockets registered with interest
1704 		// for this multicast group.
1705 		deliver_multicast(module, buffer, false);
1706 		gBufferModule->free(buffer);
1707 		return B_OK;
1708 	}
1709 
1710 	return module->receive_data(buffer);
1711 }
1712 
1713 
1714 status_t
1715 ipv4_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1716 {
1717 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1718 
1719 	if (protocol->raw == NULL)
1720 		return B_ERROR;
1721 
1722 	return protocol->raw->EnqueueClone(buffer);
1723 }
1724 
1725 
1726 status_t
1727 ipv4_error_received(net_error error, net_buffer* buffer)
1728 {
1729 	TRACE("  ipv4_error_received(error %d, buffer %p [%zu bytes])", (int)error,
1730 		buffer, buffer->size);
1731 
1732 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1733 	if (bufferHeader.Status() != B_OK)
1734 		return bufferHeader.Status();
1735 
1736 	ipv4_header& header = bufferHeader.Data();
1737 	TRACE_ONLY(dump_ipv4_header(header));
1738 
1739 	// We do not check the packet length, as we usually only get a part of it
1740 	uint16 headerLength = header.HeaderLength();
1741 	if (header.version != IPV4_VERSION
1742 		|| headerLength < sizeof(ipv4_header)
1743 		|| gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1744 		return B_BAD_DATA;
1745 
1746 	// Restore addresses of the original buffer
1747 
1748 	// lower layers notion of broadcast or multicast have no relevance to us
1749 	// TODO: they actually have when deciding whether to send an ICMP error
1750 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1751 
1752 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1753 	fill_sockaddr_in((struct sockaddr_in*)buffer->destination,
1754 		header.destination);
1755 
1756 	if (header.destination == INADDR_BROADCAST)
1757 		buffer->flags |= MSG_BCAST;
1758 	else if (IN_MULTICAST(ntohl(header.destination)))
1759 		buffer->flags |= MSG_MCAST;
1760 
1761 	// test if the packet is really from us
1762 	if (!sDatalinkModule->is_local_address(sDomain, buffer->source, NULL,
1763 			NULL)) {
1764 		TRACE("  ipv4_error_received(): packet was not for us %x -> %x",
1765 			ntohl(header.source), ntohl(header.destination));
1766 		return B_ERROR;
1767 	}
1768 
1769 	buffer->protocol = header.protocol;
1770 
1771 	bufferHeader.Remove(headerLength);
1772 
1773 	net_protocol_module_info* protocol = receiving_protocol(buffer->protocol);
1774 	if (protocol == NULL)
1775 		return B_ERROR;
1776 
1777 	// propagate error
1778 	return protocol->error_received(error, buffer);
1779 }
1780 
1781 
1782 status_t
1783 ipv4_error_reply(net_protocol* protocol, net_buffer* cause, net_error error,
1784 	net_error_data* errorData)
1785 {
1786 	// Directly obtain the ICMP protocol module
1787 	net_protocol_module_info* icmp = receiving_protocol(IPPROTO_ICMP);
1788 	if (icmp == NULL)
1789 		return B_ERROR;
1790 
1791 	return icmp->error_reply(protocol, cause, error, errorData);
1792 }
1793 
1794 
1795 ssize_t
1796 ipv4_process_ancillary_data_no_container(net_protocol* protocol,
1797 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1798 {
1799 	ssize_t bytesWritten = 0;
1800 
1801 	if ((((ipv4_protocol*)protocol)->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0) {
1802 		if (msgControlLen < CMSG_SPACE(sizeof(struct in_addr)))
1803 			return B_NO_MEMORY;
1804 
1805 		cmsghdr* messageHeader = (cmsghdr*)msgControl;
1806 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1807 		messageHeader->cmsg_level = IPPROTO_IP;
1808 		messageHeader->cmsg_type = IP_RECVDSTADDR;
1809 
1810 		memcpy(CMSG_DATA(messageHeader),
1811 		 	&((struct sockaddr_in*)buffer->destination)->sin_addr,
1812 		 	sizeof(struct in_addr));
1813 
1814 		bytesWritten += CMSG_SPACE(sizeof(struct in_addr));
1815 	}
1816 
1817 	return bytesWritten;
1818 }
1819 
1820 
1821 //	#pragma mark -
1822 
1823 
1824 status_t
1825 init_ipv4()
1826 {
1827 	sPacketID = (int32)system_time();
1828 
1829 	mutex_init(&sRawSocketsLock, "raw sockets");
1830 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1831 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1832 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1833 
1834 	status_t status;
1835 
1836 	sMulticastState = new MulticastState();
1837 	if (sMulticastState == NULL) {
1838 		status = B_NO_MEMORY;
1839 		goto err4;
1840 	}
1841 
1842 	status = sMulticastState->Init();
1843 	if (status != B_OK)
1844 		goto err5;
1845 
1846 	new (&sFragmentHash) FragmentTable();
1847 	status = sFragmentHash.Init(256);
1848 	if (status != B_OK)
1849 		goto err5;
1850 
1851 	new (&sRawSockets) RawSocketList;
1852 		// static initializers do not work in the kernel,
1853 		// so we have to do it here, manually
1854 		// TODO: for modules, this shouldn't be required
1855 
1856 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1857 		"network/protocols/ipv4/v1", NULL);
1858 	if (status != B_OK)
1859 		goto err6;
1860 
1861 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1862 		&gIPv4AddressModule, &sDomain);
1863 	if (status != B_OK)
1864 		goto err6;
1865 
1866 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1867 		"list all current IPv4 multicast states");
1868 
1869 	return B_OK;
1870 
1871 err6:
1872 	sFragmentHash.~FragmentTable();
1873 err5:
1874 	delete sMulticastState;
1875 err4:
1876 	mutex_destroy(&sReceivingProtocolLock);
1877 	mutex_destroy(&sMulticastGroupsLock);
1878 	mutex_destroy(&sFragmentLock);
1879 	mutex_destroy(&sRawSocketsLock);
1880 	return status;
1881 }
1882 
1883 
1884 status_t
1885 uninit_ipv4()
1886 {
1887 	mutex_lock(&sReceivingProtocolLock);
1888 
1889 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1890 
1891 	// put all the domain receiving protocols we gathered so far
1892 	for (uint32 i = 0; i < 256; i++) {
1893 		if (sReceivingProtocol[i] != NULL)
1894 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1895 	}
1896 
1897 	gStackModule->unregister_domain(sDomain);
1898 	mutex_unlock(&sReceivingProtocolLock);
1899 
1900 	delete sMulticastState;
1901 	sFragmentHash.~FragmentTable();
1902 
1903 	mutex_destroy(&sMulticastGroupsLock);
1904 	mutex_destroy(&sFragmentLock);
1905 	mutex_destroy(&sRawSocketsLock);
1906 	mutex_destroy(&sReceivingProtocolLock);
1907 
1908 	return B_OK;
1909 }
1910 
1911 
1912 static status_t
1913 ipv4_std_ops(int32 op, ...)
1914 {
1915 	switch (op) {
1916 		case B_MODULE_INIT:
1917 			return init_ipv4();
1918 		case B_MODULE_UNINIT:
1919 			return uninit_ipv4();
1920 
1921 		default:
1922 			return B_ERROR;
1923 	}
1924 }
1925 
1926 
1927 net_protocol_module_info gIPv4Module = {
1928 	{
1929 		"network/protocols/ipv4/v1",
1930 		0,
1931 		ipv4_std_ops
1932 	},
1933 	NET_PROTOCOL_ATOMIC_MESSAGES,
1934 
1935 	ipv4_init_protocol,
1936 	ipv4_uninit_protocol,
1937 	ipv4_open,
1938 	ipv4_close,
1939 	ipv4_free,
1940 	ipv4_connect,
1941 	ipv4_accept,
1942 	ipv4_control,
1943 	ipv4_getsockopt,
1944 	ipv4_setsockopt,
1945 	ipv4_bind,
1946 	ipv4_unbind,
1947 	ipv4_listen,
1948 	ipv4_shutdown,
1949 	ipv4_send_data,
1950 	ipv4_send_routed_data,
1951 	ipv4_send_avail,
1952 	ipv4_read_data,
1953 	ipv4_read_avail,
1954 	ipv4_get_domain,
1955 	ipv4_get_mtu,
1956 	ipv4_receive_data,
1957 	ipv4_deliver_data,
1958 	ipv4_error_received,
1959 	ipv4_error_reply,
1960 	NULL,		// add_ancillary_data()
1961 	NULL,		// process_ancillary_data()
1962 	ipv4_process_ancillary_data_no_container,
1963 	NULL,		// send_data_no_buffer()
1964 	NULL		// read_data_no_buffer()
1965 };
1966 
1967 module_dependency module_dependencies[] = {
1968 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1969 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1970 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1971 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1972 	{}
1973 };
1974 
1975 module_info* modules[] = {
1976 	(module_info*)&gIPv4Module,
1977 	NULL
1978 };
1979