xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision 21258e2674226d6aa732321b6f8494841895af5f)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4.h"
11 #include "ipv4_address.h"
12 #include "multicast.h"
13 
14 #include <net_datalink.h>
15 #include <net_datalink_protocol.h>
16 #include <net_device.h>
17 #include <net_protocol.h>
18 #include <net_stack.h>
19 #include <NetBufferUtilities.h>
20 #include <ProtocolUtilities.h>
21 
22 #include <KernelExport.h>
23 #include <util/AutoLock.h>
24 #include <util/list.h>
25 #include <util/DoublyLinkedList.h>
26 #include <util/MultiHashTable.h>
27 
28 #include <netinet/in.h>
29 #include <netinet/ip.h>
30 #include <new>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <utility>
35 
36 
37 //#define TRACE_IPV4
38 #ifdef TRACE_IPV4
39 #	define TRACE(format, args...) \
40 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
41 #	define TRACE_SK(protocol, format, args...) \
42 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
43 			protocol , ##args)
44 #	define TRACE_ONLY(x) x
45 #else
46 #	define TRACE(args...) ;
47 #	define TRACE_SK(args...) ;
48 #	define TRACE_ONLY(x)
49 #endif
50 
51 
52 #define MAX_HASH_FRAGMENTS 		64
53 	// slots in the fragment packet's hash
54 #define FRAGMENT_TIMEOUT		60000000LL
55 	// discard fragment after 60 seconds
56 
57 
58 typedef DoublyLinkedList<struct net_buffer,
59 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
60 
61 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
62 
63 struct ipv4_packet_key {
64 	in_addr_t	source;
65 	in_addr_t	destination;
66 	uint16		id;
67 	uint8		protocol;
68 };
69 
70 
71 class FragmentPacket {
72 public:
73 								FragmentPacket(const ipv4_packet_key& key);
74 								~FragmentPacket();
75 
76 			status_t			AddFragment(uint16 start, uint16 end,
77 									net_buffer* buffer, bool lastFragment);
78 			status_t			Reassemble(net_buffer* to);
79 
80 			bool				IsComplete() const
81 									{ return fReceivedLastFragment
82 										&& fBytesLeft == 0; }
83 
84 			const ipv4_packet_key& Key() const { return fKey; }
85 			FragmentPacket*&	HashTableLink() { return fNext; }
86 
87 	static	void				StaleTimer(struct net_timer* timer, void* data);
88 
89 private:
90 			FragmentPacket*		fNext;
91 			struct ipv4_packet_key fKey;
92 			uint32				fIndex;
93 			bool				fReceivedLastFragment;
94 			int32				fBytesLeft;
95 			FragmentList		fFragments;
96 			net_timer			fTimer;
97 };
98 
99 
100 struct FragmentHashDefinition {
101 	typedef ipv4_packet_key KeyType;
102 	typedef FragmentPacket ValueType;
103 
104 	size_t HashKey(const KeyType& key) const
105 	{
106 		return (key.source ^ key.destination ^ key.protocol ^ key.id);
107 	}
108 
109 	size_t Hash(ValueType* value) const
110 	{
111 		return HashKey(value->Key());
112 	}
113 
114 	bool Compare(const KeyType& key, ValueType* value) const
115 	{
116 		const ipv4_packet_key& packetKey = value->Key();
117 
118 		return packetKey.id == key.id
119 			&& packetKey.source == key.source
120 			&& packetKey.destination == key.destination
121 			&& packetKey.protocol == key.protocol;
122 	}
123 
124 	ValueType*& GetLink(ValueType* value) const
125 	{
126 		return value->HashTableLink();
127 	}
128 };
129 
130 typedef BOpenHashTable<FragmentHashDefinition, false, true> FragmentTable;
131 
132 
133 class RawSocket
134 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
135 public:
136 								RawSocket(net_socket* socket);
137 };
138 
139 typedef DoublyLinkedList<RawSocket> RawSocketList;
140 
141 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
142 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
143 
144 struct MulticastStateHash {
145 	typedef std::pair<const in_addr* , uint32> KeyType;
146 	typedef IPv4GroupInterface ValueType;
147 
148 	size_t HashKey(const KeyType &key) const
149 		{ return key.first->s_addr ^ key.second; }
150 	size_t Hash(ValueType* value) const
151 		{ return HashKey(std::make_pair(&value->Address(),
152 			value->Interface()->index)); }
153 	bool Compare(const KeyType &key, ValueType* value) const
154 		{ return value->Interface()->index == key.second
155 			&& value->Address().s_addr == key.first->s_addr; }
156 	bool CompareValues(ValueType* value1, ValueType* value2) const
157 		{ return value1->Interface()->index == value2->Interface()->index
158 			&& value1->Address().s_addr == value2->Address().s_addr; }
159 	ValueType*& GetLink(ValueType* value) const { return value->MulticastGroupsHashLink(); }
160 };
161 
162 
163 struct ipv4_protocol : net_protocol {
164 	ipv4_protocol()
165 		:
166 		raw(NULL),
167 		multicast_filter(this)
168 	{
169 	}
170 
171 	~ipv4_protocol()
172 	{
173 		delete raw;
174 	}
175 
176 	RawSocket*			raw;
177 	uint8				service_type;
178 	uint8				time_to_live;
179 	uint8				multicast_time_to_live;
180 	uint32				flags;
181 	struct sockaddr*	multicast_address; // for IP_MULTICAST_IF
182 
183 	IPv4MulticastFilter	multicast_filter;
184 };
185 
186 // protocol flags
187 #define IP_FLAG_HEADER_INCLUDED		0x01
188 #define IP_FLAG_RECEIVE_DEST_ADDR	0x02
189 
190 
191 static const int kDefaultTTL = 254;
192 static const int kDefaultMulticastTTL = 1;
193 
194 
195 extern net_protocol_module_info gIPv4Module;
196 	// we need this in ipv4_std_ops() for registering the AF_INET domain
197 
198 net_stack_module_info* gStackModule;
199 net_buffer_module_info* gBufferModule;
200 
201 static struct net_domain* sDomain;
202 static net_datalink_module_info* sDatalinkModule;
203 static net_socket_module_info* sSocketModule;
204 static int32 sPacketID;
205 static RawSocketList sRawSockets;
206 static mutex sRawSocketsLock;
207 static mutex sFragmentLock;
208 static FragmentTable sFragmentHash;
209 static mutex sMulticastGroupsLock;
210 
211 typedef MultiHashTable<MulticastStateHash> MulticastState;
212 static MulticastState* sMulticastState;
213 
214 static net_protocol_module_info* sReceivingProtocol[256];
215 static mutex sReceivingProtocolLock;
216 
217 
218 static const char*
219 print_address(const in_addr* address, char* buf, size_t bufLen)
220 {
221 	unsigned int addr = ntohl(address->s_addr);
222 
223 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
224 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
225 
226 	return buf;
227 }
228 
229 
230 RawSocket::RawSocket(net_socket* socket)
231 	:
232 	DatagramSocket<>("ipv4 raw socket", socket)
233 {
234 }
235 
236 
237 //	#pragma mark -
238 
239 
240 FragmentPacket::FragmentPacket(const ipv4_packet_key& key)
241 	:
242 	fKey(key),
243 	fIndex(0),
244 	fReceivedLastFragment(false),
245 	fBytesLeft(IP_MAXPACKET)
246 {
247 	gStackModule->init_timer(&fTimer, FragmentPacket::StaleTimer, this);
248 }
249 
250 
251 FragmentPacket::~FragmentPacket()
252 {
253 	// cancel the kill timer
254 	gStackModule->set_timer(&fTimer, -1);
255 
256 	// delete all fragments
257 	net_buffer* buffer;
258 	while ((buffer = fFragments.RemoveHead()) != NULL) {
259 		gBufferModule->free(buffer);
260 	}
261 }
262 
263 
264 status_t
265 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
266 	bool lastFragment)
267 {
268 	// restart the timer
269 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
270 
271 	if (start >= end) {
272 		// invalid fragment
273 		return B_BAD_DATA;
274 	}
275 
276 	// Search for a position in the list to insert the fragment
277 
278 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
279 	net_buffer* previous = NULL;
280 	net_buffer* next = NULL;
281 	while ((previous = iterator.Next()) != NULL) {
282 		if (previous->fragment.start <= start) {
283 			// The new fragment can be inserted after this one
284 			break;
285 		}
286 
287 		next = previous;
288 	}
289 
290 	// See if we already have the fragment's data
291 
292 	if (previous != NULL && previous->fragment.start <= start
293 		&& previous->fragment.end >= end) {
294 		// we do, so we can just drop this fragment
295 		gBufferModule->free(buffer);
296 		return B_OK;
297 	}
298 
299 	fIndex = buffer->index;
300 		// adopt the buffer's device index
301 
302 	TRACE("    previous: %p, next: %p", previous, next);
303 
304 	// If we have parts of the data already, truncate as needed
305 
306 	if (previous != NULL && previous->fragment.end > start) {
307 		TRACE("    remove header %d bytes", previous->fragment.end - start);
308 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
309 		start = previous->fragment.end;
310 	}
311 	if (next != NULL && next->fragment.start < end) {
312 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
313 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
314 		end = next->fragment.start;
315 	}
316 
317 	// Now try if we can already merge the fragments together
318 
319 	// We will always keep the last buffer received, so that we can still
320 	// report an error (in which case we're not responsible for freeing it)
321 
322 	if (previous != NULL && previous->fragment.end == start) {
323 		fFragments.Remove(previous);
324 
325 		buffer->fragment.start = previous->fragment.start;
326 		buffer->fragment.end = end;
327 
328 		status_t status = gBufferModule->merge(buffer, previous, false);
329 		TRACE("    merge previous: %s", strerror(status));
330 		if (status != B_OK) {
331 			fFragments.Insert(next, previous);
332 			return status;
333 		}
334 
335 		fFragments.Insert(next, buffer);
336 
337 		// cut down existing hole
338 		fBytesLeft -= end - start;
339 
340 		if (lastFragment && !fReceivedLastFragment) {
341 			fReceivedLastFragment = true;
342 			fBytesLeft -= IP_MAXPACKET - end;
343 		}
344 
345 		TRACE("    hole length: %d", (int)fBytesLeft);
346 
347 		return B_OK;
348 	} else if (next != NULL && next->fragment.start == end) {
349 		net_buffer* afterNext = (net_buffer*)next->link.next;
350 		fFragments.Remove(next);
351 
352 		buffer->fragment.start = start;
353 		buffer->fragment.end = next->fragment.end;
354 
355 		status_t status = gBufferModule->merge(buffer, next, true);
356 		TRACE("    merge next: %s", strerror(status));
357 		if (status != B_OK) {
358 			// Insert "next" at its previous position
359 			fFragments.Insert(afterNext, next);
360 			return status;
361 		}
362 
363 		fFragments.Insert(afterNext, buffer);
364 
365 		// cut down existing hole
366 		fBytesLeft -= end - start;
367 
368 		if (lastFragment && !fReceivedLastFragment) {
369 			fReceivedLastFragment = true;
370 			fBytesLeft -= IP_MAXPACKET - end;
371 		}
372 
373 		TRACE("    hole length: %d", (int)fBytesLeft);
374 
375 		return B_OK;
376 	}
377 
378 	// We couldn't merge the fragments, so we need to add it as is
379 
380 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
381 
382 	buffer->fragment.start = start;
383 	buffer->fragment.end = end;
384 	fFragments.Insert(next, buffer);
385 
386 	// update length of the hole, if any
387 	fBytesLeft -= end - start;
388 
389 	if (lastFragment && !fReceivedLastFragment) {
390 		fReceivedLastFragment = true;
391 		fBytesLeft -= IP_MAXPACKET - end;
392 	}
393 
394 	TRACE("    hole length: %d", (int)fBytesLeft);
395 
396 	return B_OK;
397 }
398 
399 
400 /*!	Reassembles the fragments to the specified buffer \a to.
401 	This buffer must have been added via AddFragment() before.
402 */
403 status_t
404 FragmentPacket::Reassemble(net_buffer* to)
405 {
406 	if (!IsComplete())
407 		return B_ERROR;
408 
409 	net_buffer* buffer = NULL;
410 
411 	net_buffer* fragment;
412 	while ((fragment = fFragments.RemoveHead()) != NULL) {
413 		if (buffer != NULL) {
414 			status_t status;
415 			if (to == fragment) {
416 				status = gBufferModule->merge(fragment, buffer, false);
417 				buffer = fragment;
418 			} else
419 				status = gBufferModule->merge(buffer, fragment, true);
420 			if (status != B_OK)
421 				return status;
422 		} else
423 			buffer = fragment;
424 	}
425 
426 	if (buffer != to)
427 		panic("ipv4 packet reassembly did not work correctly.");
428 
429 	to->index = fIndex;
430 		// reset the buffer's device index
431 
432 	return B_OK;
433 }
434 
435 
436 /*static*/ void
437 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
438 {
439 	FragmentPacket* packet = (FragmentPacket*)data;
440 	TRACE("Assembling FragmentPacket %p timed out!", packet);
441 
442 	MutexLocker locker(&sFragmentLock);
443 	sFragmentHash.Remove(packet);
444 	locker.Unlock();
445 
446 	if (!packet->fFragments.IsEmpty()) {
447 		// Send error: fragment reassembly time exceeded
448 		sDomain->module->error_reply(NULL, packet->fFragments.First(),
449 			B_NET_ERROR_REASSEMBLY_TIME_EXCEEDED, NULL);
450 	}
451 
452 	delete packet;
453 }
454 
455 
456 //	#pragma mark -
457 
458 
459 #ifdef TRACE_IPV4
460 static void
461 dump_ipv4_header(ipv4_header &header)
462 {
463 	struct pretty_ipv4 {
464 	#if B_HOST_IS_LENDIAN == 1
465 		uint8 a;
466 		uint8 b;
467 		uint8 c;
468 		uint8 d;
469 	#else
470 		uint8 d;
471 		uint8 c;
472 		uint8 b;
473 		uint8 a;
474 	#endif
475 	};
476 	struct pretty_ipv4* src = (struct pretty_ipv4*)&header.source;
477 	struct pretty_ipv4* dst = (struct pretty_ipv4*)&header.destination;
478 	dprintf("  version: %d\n", header.version);
479 	dprintf("  header_length: 4 * %d\n", header.header_length);
480 	dprintf("  service_type: %d\n", header.service_type);
481 	dprintf("  total_length: %d\n", header.TotalLength());
482 	dprintf("  id: %d\n", ntohs(header.id));
483 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
484 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
485 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
486 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
487 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
488 	dprintf("  time_to_live: %d\n", header.time_to_live);
489 	dprintf("  protocol: %d\n", header.protocol);
490 	dprintf("  checksum: %d\n", ntohs(header.checksum));
491 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
492 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
493 }
494 #endif	// TRACE_IPV4
495 
496 
497 static int
498 dump_ipv4_multicast(int argc, char** argv)
499 {
500 	MulticastState::Iterator groupIterator = sMulticastState->GetIterator();
501 
502 	while (groupIterator.HasNext()) {
503 		IPv4GroupInterface* state = groupIterator.Next();
504 
505 		char addressBuffer[64];
506 
507 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
508 			print_address(&state->Address(), addressBuffer,
509 			sizeof(addressBuffer)),
510 			state->Mode() == IPv4GroupInterface::kExclude
511 				? "Exclude" : "Include");
512 
513 		int count = 0;
514 		IPv4GroupInterface::AddressSet::Iterator addressIterator
515 			= state->Sources().GetIterator();
516 		while (addressIterator.HasNext()) {
517 			kprintf("%s%s", count > 0 ? ", " : "",
518 				print_address(&addressIterator.Next(),
519 				addressBuffer, sizeof(addressBuffer)));
520 			count++;
521 		}
522 
523 		kprintf("}> sock %p\n", state->Parent()->Socket());
524 	}
525 
526 	return 0;
527 }
528 
529 
530 /*!	Attempts to re-assemble fragmented packets.
531 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
532 		will point to its buffer, otherwise, it will be \c NULL.
533 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
534 */
535 static status_t
536 reassemble_fragments(const ipv4_header &header, net_buffer** _buffer)
537 {
538 	net_buffer* buffer = *_buffer;
539 	status_t status;
540 
541 	struct ipv4_packet_key key;
542 	key.source = (in_addr_t)header.source;
543 	key.destination = (in_addr_t)header.destination;
544 	key.id = header.id;
545 	key.protocol = header.protocol;
546 
547 	// TODO: Make locking finer grained.
548 	MutexLocker locker(&sFragmentLock);
549 
550 	FragmentPacket* packet = sFragmentHash.Lookup(key);
551 	if (packet == NULL) {
552 		// New fragment packet
553 		packet = new (std::nothrow) FragmentPacket(key);
554 		if (packet == NULL)
555 			return B_NO_MEMORY;
556 
557 		// add packet to hash
558 		status = sFragmentHash.Insert(packet);
559 		if (status != B_OK) {
560 			delete packet;
561 			return status;
562 		}
563 	}
564 
565 	uint16 fragmentOffset = header.FragmentOffset();
566 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
567 	uint16 end = start + header.TotalLength() - header.HeaderLength();
568 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
569 
570 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
571 		lastFragment ? "last ": "", end - start, start);
572 
573 	// Remove header unless this is the first fragment
574 	if (start != 0)
575 		gBufferModule->remove_header(buffer, header.HeaderLength());
576 
577 	status = packet->AddFragment(start, end, buffer, lastFragment);
578 	if (status != B_OK)
579 		return status;
580 
581 	if (packet->IsComplete()) {
582 		sFragmentHash.Remove(packet);
583 			// no matter if reassembling succeeds, we won't need this packet
584 			// anymore
585 
586 		status = packet->Reassemble(buffer);
587 		delete packet;
588 
589 		// _buffer does not change
590 		return status;
591 	}
592 
593 	// This indicates that the packet is not yet complete
594 	*_buffer = NULL;
595 	return B_OK;
596 }
597 
598 
599 /*!	Fragments the incoming buffer and send all fragments via the specified
600 	\a route.
601 */
602 static status_t
603 send_fragments(ipv4_protocol* protocol, struct net_route* route,
604 	net_buffer* buffer, uint32 mtu)
605 {
606 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
607 
608 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
609 	if (originalHeader.Status() != B_OK)
610 		return originalHeader.Status();
611 
612 	uint16 headerLength = originalHeader->HeaderLength();
613 	uint32 bytesLeft = buffer->size - headerLength;
614 	uint32 fragmentOffset = 0;
615 	status_t status = B_OK;
616 
617 	net_buffer* headerBuffer = gBufferModule->split(buffer, headerLength);
618 	if (headerBuffer == NULL)
619 		return B_NO_MEMORY;
620 
621 	// TODO: we need to make sure ipv4_header is contiguous or
622 	// use another construct.
623 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
624 	ipv4_header* header = &bufferHeader.Data();
625 
626 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
627 	// this way)
628 	mtu -= headerLength;
629 	mtu &= ~7;
630 	TRACE("  adjusted MTU to %ld, bytesLeft %ld", mtu, bytesLeft);
631 
632 	while (bytesLeft > 0) {
633 		uint32 fragmentLength = min_c(bytesLeft, mtu);
634 		bytesLeft -= fragmentLength;
635 		bool lastFragment = bytesLeft == 0;
636 
637 		header->total_length = htons(fragmentLength + headerLength);
638 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
639 			| (fragmentOffset >> 3));
640 		header->checksum = 0;
641 		header->checksum = gStackModule->checksum((uint8*)header,
642 			headerLength);
643 			// TODO: compute the checksum only for those parts that changed?
644 
645 		TRACE("  send fragment of %ld bytes (%ld bytes left)", fragmentLength,
646 			bytesLeft);
647 
648 		net_buffer* fragmentBuffer;
649 		if (!lastFragment) {
650 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
651 			fragmentOffset += fragmentLength;
652 		} else
653 			fragmentBuffer = buffer;
654 
655 		if (fragmentBuffer == NULL) {
656 			status = B_NO_MEMORY;
657 			break;
658 		}
659 
660 		// copy header to fragment
661 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
662 
663 		// send fragment
664 		if (status == B_OK)
665 			status = sDatalinkModule->send_routed_data(route, fragmentBuffer);
666 
667 		if (lastFragment) {
668 			// we don't own the last buffer, so we don't have to free it
669 			break;
670 		}
671 
672 		if (status != B_OK) {
673 			gBufferModule->free(fragmentBuffer);
674 			break;
675 		}
676 	}
677 
678 	gBufferModule->free(headerBuffer);
679 	return status;
680 }
681 
682 
683 /*!	Delivers the provided \a buffer to all listeners of this multicast group.
684 	Does not take over ownership of the buffer.
685 */
686 static bool
687 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
688 	bool deliverToRaw)
689 {
690 	if (module->deliver_data == NULL)
691 		return false;
692 
693 	// TODO: fix multicast!
694 	return false;
695 	MutexLocker _(sMulticastGroupsLock);
696 
697 	sockaddr_in* multicastAddr = (sockaddr_in*)buffer->destination;
698 
699 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
700 		&multicastAddr->sin_addr, buffer->interface_address->interface->index));
701 
702 	size_t count = 0;
703 
704 	while (it.HasNext()) {
705 		IPv4GroupInterface* state = it.Next();
706 
707 		ipv4_protocol* ipProtocol = state->Parent()->Socket();
708 		if (deliverToRaw && (ipProtocol->raw == NULL
709 				|| ipProtocol->socket->protocol != buffer->protocol))
710 			continue;
711 
712 		if (state->FilterAccepts(buffer)) {
713 			net_protocol* protocol = ipProtocol;
714 			if (protocol->module != module) {
715 				// as multicast filters are installed with an IPv4 protocol
716 				// reference, we need to go and find the appropriate instance
717 				// related to the 'receiving protocol' with module 'module'.
718 				net_protocol* protocol = ipProtocol->socket->first_protocol;
719 
720 				while (protocol != NULL && protocol->module != module)
721 					protocol = protocol->next;
722 			}
723 
724 			if (protocol != NULL) {
725 				module->deliver_data(protocol, buffer);
726 				count++;
727 			}
728 		}
729 	}
730 
731 	return count > 0;
732 }
733 
734 
735 /*!	Delivers the buffer to all listening raw sockets without taking ownership of
736 	the provided \a buffer.
737 	Returns \c true if there was any receiver, \c false if not.
738 */
739 static bool
740 raw_receive_data(net_buffer* buffer)
741 {
742 	MutexLocker locker(sRawSocketsLock);
743 
744 	if (sRawSockets.IsEmpty())
745 		return false;
746 
747 	TRACE("RawReceiveData(%i)", buffer->protocol);
748 
749 	if ((buffer->flags & MSG_MCAST) != 0) {
750 		// we need to call deliver_multicast here separately as
751 		// buffer still has the IP header, and it won't in the
752 		// next call. This isn't very optimized but works for now.
753 		// A better solution would be to hold separate hash tables
754 		// and lists for RAW and non-RAW sockets.
755 		return deliver_multicast(&gIPv4Module, buffer, true);
756 	}
757 
758 	RawSocketList::Iterator iterator = sRawSockets.GetIterator();
759 	size_t count = 0;
760 
761 	while (iterator.HasNext()) {
762 		RawSocket* raw = iterator.Next();
763 
764 		if (raw->Socket()->protocol == buffer->protocol) {
765 			raw->EnqueueClone(buffer);
766 			count++;
767 		}
768 	}
769 
770 	return count > 0;
771 }
772 
773 
774 static inline sockaddr*
775 fill_sockaddr_in(sockaddr_in* target, in_addr_t address)
776 {
777 	target->sin_family = AF_INET;
778 	target->sin_len = sizeof(sockaddr_in);
779 	target->sin_port = 0;
780 	target->sin_addr.s_addr = address;
781 	return (sockaddr*)target;
782 }
783 
784 
785 static status_t
786 get_int_option(void* target, size_t length, int value)
787 {
788 	if (length != sizeof(int))
789 		return B_BAD_VALUE;
790 
791 	return user_memcpy(target, &value, sizeof(int));
792 }
793 
794 
795 template<typename Type> static status_t
796 set_int_option(Type &target, const void* _value, size_t length)
797 {
798 	int value;
799 
800 	if (length != sizeof(int))
801 		return B_BAD_VALUE;
802 
803 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
804 		return B_BAD_ADDRESS;
805 
806 	target = value;
807 	return B_OK;
808 }
809 
810 
811 static net_protocol_module_info*
812 receiving_protocol(uint8 protocol)
813 {
814 	net_protocol_module_info* module = sReceivingProtocol[protocol];
815 	if (module != NULL)
816 		return module;
817 
818 	MutexLocker locker(sReceivingProtocolLock);
819 
820 	module = sReceivingProtocol[protocol];
821 	if (module != NULL)
822 		return module;
823 
824 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
825 			&module) == B_OK)
826 		sReceivingProtocol[protocol] = module;
827 
828 	return module;
829 }
830 
831 
832 // #pragma mark - multicast
833 
834 
835 status_t
836 IPv4Multicast::JoinGroup(IPv4GroupInterface* state)
837 {
838 	MutexLocker _(sMulticastGroupsLock);
839 
840 	sockaddr_in groupAddr;
841 	status_t status = sDatalinkModule->join_multicast(state->Interface(),
842 		sDomain, fill_sockaddr_in(&groupAddr, state->Address().s_addr));
843 	if (status != B_OK)
844 		return status;
845 
846 	sMulticastState->Insert(state);
847 	return B_OK;
848 }
849 
850 
851 status_t
852 IPv4Multicast::LeaveGroup(IPv4GroupInterface* state)
853 {
854 	MutexLocker _(sMulticastGroupsLock);
855 
856 	sMulticastState->Remove(state);
857 
858 	sockaddr_in groupAddr;
859 	return sDatalinkModule->leave_multicast(state->Interface(), sDomain,
860 		fill_sockaddr_in(&groupAddr, state->Address().s_addr));
861 }
862 
863 
864 static status_t
865 ipv4_delta_group(IPv4GroupInterface* group, int option,
866 	net_interface* interface, const in_addr* sourceAddr)
867 {
868 	switch (option) {
869 		case IP_ADD_MEMBERSHIP:
870 			return group->Add();
871 		case IP_DROP_MEMBERSHIP:
872 			return group->Drop();
873 		case IP_BLOCK_SOURCE:
874 			return group->BlockSource(*sourceAddr);
875 		case IP_UNBLOCK_SOURCE:
876 			return group->UnblockSource(*sourceAddr);
877 		case IP_ADD_SOURCE_MEMBERSHIP:
878 			return group->AddSSM(*sourceAddr);
879 		case IP_DROP_SOURCE_MEMBERSHIP:
880 			return group->DropSSM(*sourceAddr);
881 	}
882 
883 	return B_ERROR;
884 }
885 
886 
887 static status_t
888 ipv4_delta_membership(ipv4_protocol* protocol, int option,
889 	net_interface* interface, const in_addr* groupAddr,
890 	const in_addr* sourceAddr)
891 {
892 	IPv4MulticastFilter& filter = protocol->multicast_filter;
893 	IPv4GroupInterface* state = NULL;
894 	status_t status = B_OK;
895 
896 	switch (option) {
897 		case IP_ADD_MEMBERSHIP:
898 		case IP_ADD_SOURCE_MEMBERSHIP:
899 			status = filter.GetState(*groupAddr, interface, state, true);
900 			break;
901 
902 		case IP_DROP_MEMBERSHIP:
903 		case IP_BLOCK_SOURCE:
904 		case IP_UNBLOCK_SOURCE:
905 		case IP_DROP_SOURCE_MEMBERSHIP:
906 			filter.GetState(*groupAddr, interface, state, false);
907 			if (state == NULL) {
908 				if (option == IP_DROP_MEMBERSHIP
909 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
910 					return EADDRNOTAVAIL;
911 
912 				return B_BAD_VALUE;
913 			}
914 			break;
915 	}
916 
917 	if (status != B_OK)
918 		return status;
919 
920 	status = ipv4_delta_group(state, option, interface, sourceAddr);
921 	filter.ReturnState(state);
922 	return status;
923 }
924 
925 
926 static int
927 generic_to_ipv4(int option)
928 {
929 	switch (option) {
930 		case MCAST_JOIN_GROUP:
931 			return IP_ADD_MEMBERSHIP;
932 		case MCAST_JOIN_SOURCE_GROUP:
933 			return IP_ADD_SOURCE_MEMBERSHIP;
934 		case MCAST_LEAVE_GROUP:
935 			return IP_DROP_MEMBERSHIP;
936 		case MCAST_BLOCK_SOURCE:
937 			return IP_BLOCK_SOURCE;
938 		case MCAST_UNBLOCK_SOURCE:
939 			return IP_UNBLOCK_SOURCE;
940 		case MCAST_LEAVE_SOURCE_GROUP:
941 			return IP_DROP_SOURCE_MEMBERSHIP;
942 	}
943 
944 	return -1;
945 }
946 
947 
948 static net_interface*
949 get_multicast_interface(ipv4_protocol* protocol, const in_addr* address)
950 {
951 	// TODO: this is broken and leaks references
952 	sockaddr_in groupAddr;
953 	net_route* route = sDatalinkModule->get_route(sDomain,
954 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
955 	if (route == NULL)
956 		return NULL;
957 
958 	return route->interface_address->interface;
959 }
960 
961 
962 static status_t
963 ipv4_delta_membership(ipv4_protocol* protocol, int option,
964 	in_addr* interfaceAddr, in_addr* groupAddr, in_addr* sourceAddr)
965 {
966 	net_interface* interface = NULL;
967 
968 	if (interfaceAddr->s_addr == INADDR_ANY) {
969 		interface = get_multicast_interface(protocol, groupAddr);
970 	} else {
971 		sockaddr_in address;
972 		interface = sDatalinkModule->get_interface_with_address(
973 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
974 	}
975 
976 	if (interface == NULL)
977 		return B_DEVICE_NOT_FOUND;
978 
979 	return ipv4_delta_membership(protocol, option, interface,
980 		groupAddr, sourceAddr);
981 }
982 
983 
984 static status_t
985 ipv4_generic_delta_membership(ipv4_protocol* protocol, int option,
986 	uint32 index, const sockaddr_storage* _groupAddr,
987 	const sockaddr_storage* _sourceAddr)
988 {
989 	if (_groupAddr->ss_family != AF_INET
990 		|| (_sourceAddr != NULL && _sourceAddr->ss_family != AF_INET))
991 		return B_BAD_VALUE;
992 
993 	const in_addr* groupAddr = &((const sockaddr_in*)_groupAddr)->sin_addr;
994 
995 	// TODO: this is broken and leaks references
996 	net_interface* interface;
997 	if (index == 0)
998 		interface = get_multicast_interface(protocol, groupAddr);
999 	else
1000 		interface = sDatalinkModule->get_interface(sDomain, index);
1001 
1002 	if (interface == NULL)
1003 		return B_DEVICE_NOT_FOUND;
1004 
1005 	const in_addr* sourceAddr = NULL;
1006 	if (_sourceAddr != NULL)
1007 		sourceAddr = &((const sockaddr_in*)_sourceAddr)->sin_addr;
1008 
1009 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
1010 		groupAddr, sourceAddr);
1011 }
1012 
1013 
1014 //	#pragma mark - module interface
1015 
1016 
1017 net_protocol*
1018 ipv4_init_protocol(net_socket* socket)
1019 {
1020 	ipv4_protocol* protocol = new (std::nothrow) ipv4_protocol();
1021 	if (protocol == NULL)
1022 		return NULL;
1023 
1024 	protocol->raw = NULL;
1025 	protocol->service_type = 0;
1026 	protocol->time_to_live = kDefaultTTL;
1027 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
1028 	protocol->flags = 0;
1029 	protocol->multicast_address = NULL;
1030 	return protocol;
1031 }
1032 
1033 
1034 status_t
1035 ipv4_uninit_protocol(net_protocol* _protocol)
1036 {
1037 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1038 
1039 	delete protocol;
1040 
1041 	return B_OK;
1042 }
1043 
1044 
1045 /*!	Since open() is only called on the top level protocol, when we get here
1046 	it means we are on a SOCK_RAW socket.
1047 */
1048 status_t
1049 ipv4_open(net_protocol* _protocol)
1050 {
1051 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1052 
1053 	// Only root may open raw sockets
1054 	if (geteuid() != 0)
1055 		return B_NOT_ALLOWED;
1056 
1057 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1058 	if (raw == NULL)
1059 		return B_NO_MEMORY;
1060 
1061 	status_t status = raw->InitCheck();
1062 	if (status != B_OK) {
1063 		delete raw;
1064 		return status;
1065 	}
1066 
1067 	TRACE_SK(protocol, "Open()");
1068 
1069 	protocol->raw = raw;
1070 
1071 	MutexLocker locker(sRawSocketsLock);
1072 	sRawSockets.Add(raw);
1073 	return B_OK;
1074 }
1075 
1076 
1077 status_t
1078 ipv4_close(net_protocol* _protocol)
1079 {
1080 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1081 	RawSocket* raw = protocol->raw;
1082 	if (raw == NULL)
1083 		return B_ERROR;
1084 
1085 	TRACE_SK(protocol, "Close()");
1086 
1087 	MutexLocker locker(sRawSocketsLock);
1088 	sRawSockets.Remove(raw);
1089 	delete raw;
1090 	protocol->raw = NULL;
1091 
1092 	return B_OK;
1093 }
1094 
1095 
1096 status_t
1097 ipv4_free(net_protocol* protocol)
1098 {
1099 	return B_OK;
1100 }
1101 
1102 
1103 status_t
1104 ipv4_connect(net_protocol* protocol, const struct sockaddr* address)
1105 {
1106 	return B_ERROR;
1107 }
1108 
1109 
1110 status_t
1111 ipv4_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1112 {
1113 	return B_NOT_SUPPORTED;
1114 }
1115 
1116 
1117 status_t
1118 ipv4_control(net_protocol* _protocol, int level, int option, void* value,
1119 	size_t* _length)
1120 {
1121 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1122 		return sDatalinkModule->control(sDomain, option, value, _length);
1123 
1124 	return B_BAD_VALUE;
1125 }
1126 
1127 
1128 status_t
1129 ipv4_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1130 	int* _length)
1131 {
1132 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1133 
1134 	if (level == IPPROTO_IP) {
1135 		if (option == IP_HDRINCL) {
1136 			return get_int_option(value, *_length,
1137 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1138 		}
1139 		if (option == IP_RECVDSTADDR) {
1140 			return get_int_option(value, *_length,
1141 				(protocol->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0);
1142 		}
1143 		if (option == IP_TTL)
1144 			return get_int_option(value, *_length, protocol->time_to_live);
1145 		if (option == IP_TOS)
1146 			return get_int_option(value, *_length, protocol->service_type);
1147 		if (option == IP_MULTICAST_TTL) {
1148 			return get_int_option(value, *_length,
1149 				protocol->multicast_time_to_live);
1150 		}
1151 		if (option == IP_ADD_MEMBERSHIP
1152 			|| option == IP_DROP_MEMBERSHIP
1153 			|| option == IP_BLOCK_SOURCE
1154 			|| option == IP_UNBLOCK_SOURCE
1155 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1156 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1157 			|| option == MCAST_JOIN_GROUP
1158 			|| option == MCAST_LEAVE_GROUP
1159 			|| option == MCAST_BLOCK_SOURCE
1160 			|| option == MCAST_UNBLOCK_SOURCE
1161 			|| option == MCAST_JOIN_SOURCE_GROUP
1162 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1163 			// RFC 3678, Section 4.1:
1164 			// ``An error of EOPNOTSUPP is returned if these options are
1165 			// used with getsockopt().''
1166 			return B_NOT_SUPPORTED;
1167 		}
1168 
1169 		dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1170 		return ENOPROTOOPT;
1171 	}
1172 
1173 	return sSocketModule->get_option(protocol->socket, level, option, value,
1174 		_length);
1175 }
1176 
1177 
1178 status_t
1179 ipv4_setsockopt(net_protocol* _protocol, int level, int option,
1180 	const void* value, int length)
1181 {
1182 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1183 
1184 	if (level == IPPROTO_IP) {
1185 		if (option == IP_HDRINCL) {
1186 			int headerIncluded;
1187 			if (length != sizeof(int))
1188 				return B_BAD_VALUE;
1189 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded))
1190 					!= B_OK)
1191 				return B_BAD_ADDRESS;
1192 
1193 			if (headerIncluded)
1194 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1195 			else
1196 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1197 
1198 			return B_OK;
1199 		}
1200 		if (option == IP_RECVDSTADDR) {
1201 			int getAddress;
1202 			if (length != sizeof(int))
1203 				return B_BAD_VALUE;
1204 			if (user_memcpy(&getAddress, value, sizeof(int)) != B_OK)
1205 				return B_BAD_ADDRESS;
1206 
1207 			if (getAddress && (protocol->socket->type == SOCK_DGRAM
1208 					|| protocol->socket->type == SOCK_RAW))
1209 				protocol->flags |= IP_FLAG_RECEIVE_DEST_ADDR;
1210 			else
1211 				protocol->flags &= ~IP_FLAG_RECEIVE_DEST_ADDR;
1212 
1213 			return B_OK;
1214 		}
1215 		if (option == IP_TTL)
1216 			return set_int_option(protocol->time_to_live, value, length);
1217 		if (option == IP_TOS)
1218 			return set_int_option(protocol->service_type, value, length);
1219 		if (option == IP_MULTICAST_IF) {
1220 			if (length != sizeof(struct in_addr))
1221 				return B_BAD_VALUE;
1222 
1223 			struct sockaddr_in* address = new (std::nothrow) sockaddr_in;
1224 			if (address == NULL)
1225 				return B_NO_MEMORY;
1226 
1227 			if (user_memcpy(&address->sin_addr, value, sizeof(struct in_addr))
1228 					!= B_OK) {
1229 				delete address;
1230 				return B_BAD_ADDRESS;
1231 			}
1232 
1233 			// Using INADDR_ANY to remove the previous setting.
1234 			if (address->sin_addr.s_addr == htonl(INADDR_ANY)) {
1235 				delete address;
1236 				delete protocol->multicast_address;
1237 				protocol->multicast_address = NULL;
1238 				return B_OK;
1239 			}
1240 
1241 			struct net_interface* interface
1242 				= sDatalinkModule->get_interface_with_address(
1243 					(sockaddr*)address);
1244 			if (interface == NULL) {
1245 				delete address;
1246 				return EADDRNOTAVAIL;
1247 			}
1248 
1249 			delete protocol->multicast_address;
1250 			protocol->multicast_address = (struct sockaddr*)address;
1251 
1252 			sDatalinkModule->put_interface(interface);
1253 			return B_OK;
1254 		}
1255 		if (option == IP_MULTICAST_TTL) {
1256 			return set_int_option(protocol->multicast_time_to_live, value,
1257 				length);
1258 		}
1259 		if (option == IP_ADD_MEMBERSHIP || option == IP_DROP_MEMBERSHIP) {
1260 			ip_mreq mreq;
1261 			if (length != sizeof(ip_mreq))
1262 				return B_BAD_VALUE;
1263 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) != B_OK)
1264 				return B_BAD_ADDRESS;
1265 
1266 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1267 				&mreq.imr_multiaddr, NULL);
1268 		}
1269 		if (option == IP_BLOCK_SOURCE
1270 			|| option == IP_UNBLOCK_SOURCE
1271 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1272 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1273 			ip_mreq_source mreq;
1274 			if (length != sizeof(ip_mreq_source))
1275 				return B_BAD_VALUE;
1276 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) != B_OK)
1277 				return B_BAD_ADDRESS;
1278 
1279 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1280 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1281 		}
1282 		if (option == MCAST_LEAVE_GROUP || option == MCAST_JOIN_GROUP) {
1283 			group_req greq;
1284 			if (length != sizeof(group_req))
1285 				return B_BAD_VALUE;
1286 			if (user_memcpy(&greq, value, sizeof(group_req)) != B_OK)
1287 				return B_BAD_ADDRESS;
1288 
1289 			return ipv4_generic_delta_membership(protocol, option,
1290 				greq.gr_interface, &greq.gr_group, NULL);
1291 		}
1292 		if (option == MCAST_BLOCK_SOURCE
1293 			|| option == MCAST_UNBLOCK_SOURCE
1294 			|| option == MCAST_JOIN_SOURCE_GROUP
1295 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1296 			group_source_req greq;
1297 			if (length != sizeof(group_source_req))
1298 				return B_BAD_VALUE;
1299 			if (user_memcpy(&greq, value, sizeof(group_source_req)) != B_OK)
1300 				return B_BAD_ADDRESS;
1301 
1302 			return ipv4_generic_delta_membership(protocol, option,
1303 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1304 		}
1305 
1306 		dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1307 		return ENOPROTOOPT;
1308 	}
1309 
1310 	return sSocketModule->set_option(protocol->socket, level, option,
1311 		value, length);
1312 }
1313 
1314 
1315 status_t
1316 ipv4_bind(net_protocol* protocol, const struct sockaddr* address)
1317 {
1318 	if (address->sa_family != AF_INET)
1319 		return EAFNOSUPPORT;
1320 
1321 	// only INADDR_ANY and addresses of local interfaces are accepted:
1322 	if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY
1323 		|| IN_MULTICAST(ntohl(((sockaddr_in*)address)->sin_addr.s_addr))
1324 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1325 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1326 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1327 			// explicitly set length, as our callers can't be trusted to
1328 			// always provide the correct length!
1329 		return B_OK;
1330 	}
1331 
1332 	return B_ERROR;
1333 		// address is unknown on this host
1334 }
1335 
1336 
1337 status_t
1338 ipv4_unbind(net_protocol* protocol, struct sockaddr* address)
1339 {
1340 	// nothing to do here
1341 	return B_OK;
1342 }
1343 
1344 
1345 status_t
1346 ipv4_listen(net_protocol* protocol, int count)
1347 {
1348 	return B_NOT_SUPPORTED;
1349 }
1350 
1351 
1352 status_t
1353 ipv4_shutdown(net_protocol* protocol, int direction)
1354 {
1355 	return B_NOT_SUPPORTED;
1356 }
1357 
1358 
1359 status_t
1360 ipv4_send_routed_data(net_protocol* _protocol, struct net_route* route,
1361 	net_buffer* buffer)
1362 {
1363 	if (route == NULL)
1364 		return B_BAD_VALUE;
1365 
1366 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1367 	net_interface_address* interfaceAddress = route->interface_address;
1368 	net_interface* interface = interfaceAddress->interface;
1369 
1370 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1371 		buffer->size);
1372 
1373 	sockaddr_in& source = *(sockaddr_in*)buffer->source;
1374 	sockaddr_in& destination = *(sockaddr_in*)buffer->destination;
1375 	sockaddr_in* broadcastAddress = (sockaddr_in*)interfaceAddress->destination;
1376 
1377 	bool checksumNeeded = true;
1378 	bool headerIncluded = false;
1379 	if (protocol != NULL)
1380 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1381 
1382 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1383 
1384 	if (destination.sin_addr.s_addr == INADDR_ANY)
1385 		return EDESTADDRREQ;
1386 
1387 	if ((interface->device->flags & IFF_BROADCAST) != 0
1388 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1389 			|| (broadcastAddress != NULL && destination.sin_addr.s_addr
1390 					== broadcastAddress->sin_addr.s_addr))) {
1391 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1392 			return B_BAD_VALUE;
1393 		buffer->flags |= MSG_BCAST;
1394 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr)))
1395 		buffer->flags |= MSG_MCAST;
1396 
1397 	// Add IP header (if needed)
1398 
1399 	if (!headerIncluded) {
1400 		NetBufferPrepend<ipv4_header> header(buffer);
1401 		if (header.Status() != B_OK)
1402 			return header.Status();
1403 
1404 		header->version = IPV4_VERSION;
1405 		header->header_length = sizeof(ipv4_header) / 4;
1406 		header->service_type = protocol ? protocol->service_type : 0;
1407 		header->total_length = htons(buffer->size);
1408 		header->id = htons(atomic_add(&sPacketID, 1));
1409 		header->fragment_offset = 0;
1410 		if (protocol) {
1411 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1412 				? protocol->multicast_time_to_live : protocol->time_to_live;
1413 		} else {
1414 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1415 				? kDefaultMulticastTTL : kDefaultTTL;
1416 		}
1417 		header->protocol = protocol
1418 			? protocol->socket->protocol : buffer->protocol;
1419 		header->checksum = 0;
1420 
1421 		header->source = source.sin_addr.s_addr;
1422 		header->destination = destination.sin_addr.s_addr;
1423 
1424 		TRACE_ONLY(dump_ipv4_header(*header));
1425 	} else {
1426 		// if IP_HDRINCL, check if the source address is set
1427 		NetBufferHeaderReader<ipv4_header> header(buffer);
1428 		if (header.Status() != B_OK)
1429 			return header.Status();
1430 
1431 		if (header->source == 0) {
1432 			header->source = source.sin_addr.s_addr;
1433 			header->checksum = 0;
1434 			header.Sync();
1435 		} else
1436 			checksumNeeded = false;
1437 
1438 		TRACE("  Header was already supplied:");
1439 		TRACE_ONLY(dump_ipv4_header(*header));
1440 	}
1441 
1442 	if (buffer->size > 0xffff)
1443 		return EMSGSIZE;
1444 
1445 	if (checksumNeeded) {
1446 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1447 			sizeof(ipv4_header), true);
1448 	}
1449 
1450 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer "
1451 		"checksum: %ld",
1452 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1453 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1454 
1455 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08x",
1456 		ntohl(destination.sin_addr.s_addr));
1457 
1458 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1459 	if (buffer->size > mtu) {
1460 		// we need to fragment the packet
1461 		return send_fragments(protocol, route, buffer, mtu);
1462 	}
1463 
1464 	return sDatalinkModule->send_routed_data(route, buffer);
1465 }
1466 
1467 
1468 status_t
1469 ipv4_send_data(net_protocol* _protocol, net_buffer* buffer)
1470 {
1471 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1472 
1473 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1474 
1475 	if (protocol != NULL && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1476 		if (buffer->size < sizeof(ipv4_header))
1477 			return B_BAD_VALUE;
1478 
1479 		sockaddr_in* source = (sockaddr_in*)buffer->source;
1480 		sockaddr_in* destination = (sockaddr_in*)buffer->destination;
1481 
1482 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1483 			offsetof(ipv4_header, source)>(buffer));
1484 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1485 			offsetof(ipv4_header, destination)>(buffer));
1486 	}
1487 
1488 	// handle IP_MULTICAST_IF
1489 	if (IN_MULTICAST(ntohl(
1490 			((sockaddr_in*)buffer->destination)->sin_addr.s_addr))
1491 		&& protocol != NULL && protocol->multicast_address != NULL) {
1492 		net_interface_address* address = sDatalinkModule->get_interface_address(
1493 			protocol->multicast_address);
1494 		if (address == NULL || (address->interface->flags & IFF_UP) == 0) {
1495 			sDatalinkModule->put_interface_address(address);
1496 			return EADDRNOTAVAIL;
1497 		}
1498 
1499 		sDatalinkModule->put_interface_address(buffer->interface_address);
1500 		buffer->interface_address = address;
1501 			// the buffer takes over ownership of the address
1502 
1503 		net_route* route = sDatalinkModule->get_route(sDomain, address->local);
1504 		if (route == NULL)
1505 			return ENETUNREACH;
1506 
1507 		return sDatalinkModule->send_routed_data(route, buffer);
1508 	}
1509 
1510 	return sDatalinkModule->send_data(protocol, sDomain, buffer);
1511 }
1512 
1513 
1514 ssize_t
1515 ipv4_send_avail(net_protocol* protocol)
1516 {
1517 	return B_ERROR;
1518 }
1519 
1520 
1521 status_t
1522 ipv4_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1523 	net_buffer** _buffer)
1524 {
1525 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1526 	RawSocket* raw = protocol->raw;
1527 	if (raw == NULL)
1528 		return B_ERROR;
1529 
1530 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1531 
1532 	return raw->Dequeue(flags, _buffer);
1533 }
1534 
1535 
1536 ssize_t
1537 ipv4_read_avail(net_protocol* _protocol)
1538 {
1539 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1540 	RawSocket* raw = protocol->raw;
1541 	if (raw == NULL)
1542 		return B_ERROR;
1543 
1544 	return raw->AvailableData();
1545 }
1546 
1547 
1548 struct net_domain*
1549 ipv4_get_domain(net_protocol* protocol)
1550 {
1551 	return sDomain;
1552 }
1553 
1554 
1555 size_t
1556 ipv4_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1557 {
1558 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1559 	if (route == NULL)
1560 		return 0;
1561 
1562 	size_t mtu;
1563 	if (route->mtu != 0)
1564 		mtu = route->mtu;
1565 	else
1566 		mtu = route->interface_address->interface->mtu;
1567 
1568 	sDatalinkModule->put_route(sDomain, route);
1569 	return mtu - sizeof(ipv4_header);
1570 }
1571 
1572 
1573 status_t
1574 ipv4_receive_data(net_buffer* buffer)
1575 {
1576 	TRACE("ipv4_receive_data(%p [%ld bytes])", buffer, buffer->size);
1577 
1578 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1579 	if (bufferHeader.Status() != B_OK)
1580 		return bufferHeader.Status();
1581 
1582 	ipv4_header& header = bufferHeader.Data();
1583 	TRACE_ONLY(dump_ipv4_header(header));
1584 
1585 	if (header.version != IPV4_VERSION)
1586 		return B_BAD_TYPE;
1587 
1588 	uint16 packetLength = header.TotalLength();
1589 	uint16 headerLength = header.HeaderLength();
1590 	if (packetLength > buffer->size
1591 		|| headerLength < sizeof(ipv4_header))
1592 		return B_BAD_DATA;
1593 
1594 	// TODO: would be nice to have a direct checksum function somewhere
1595 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1596 		return B_BAD_DATA;
1597 
1598 	// lower layers notion of broadcast or multicast have no relevance to us
1599 	// other than deciding whether to send an ICMP error
1600 	bool wasMulticast = (buffer->flags & (MSG_BCAST | MSG_MCAST)) != 0;
1601 	bool notForUs = false;
1602 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1603 
1604 	sockaddr_in destination;
1605 	fill_sockaddr_in(&destination, header.destination);
1606 
1607 	if (header.destination == INADDR_BROADCAST) {
1608 		buffer->flags |= MSG_BCAST;
1609 
1610 		// Find first interface with a matching family
1611 		if (!sDatalinkModule->is_local_link_address(sDomain, true,
1612 				buffer->destination, &buffer->interface_address))
1613 			notForUs = !wasMulticast;
1614 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1615 		buffer->flags |= MSG_MCAST;
1616 	} else {
1617 		uint32 matchedAddressType = 0;
1618 
1619 		// test if the packet is really for us
1620 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1621 				&buffer->interface_address, &matchedAddressType)
1622 			&& !sDatalinkModule->is_local_link_address(sDomain, true,
1623 				buffer->destination, &buffer->interface_address)) {
1624 			// if the buffer was a link layer multicast, regard it as a
1625 			// broadcast, and let the upper levels decide what to do with it
1626 			if (wasMulticast)
1627 				buffer->flags |= MSG_BCAST;
1628 			else
1629 				notForUs = true;
1630 		} else {
1631 			// copy over special address types (MSG_BCAST or MSG_MCAST):
1632 			buffer->flags |= matchedAddressType;
1633 		}
1634 	}
1635 
1636 	// set net_buffer's source/destination address
1637 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1638 	memcpy(buffer->destination, &destination, sizeof(sockaddr_in));
1639 
1640 	buffer->protocol = header.protocol;
1641 
1642 	if (notForUs) {
1643 		TRACE("  ipv4_receive_data(): packet was not for us %x -> %x",
1644 			ntohl(header.source), ntohl(header.destination));
1645 
1646 		if (!wasMulticast) {
1647 			// Send ICMP error: Host unreachable
1648 			sDomain->module->error_reply(NULL, buffer, B_NET_ERROR_UNREACH_HOST,
1649 				NULL);
1650 		}
1651 
1652 		return B_ERROR;
1653 	}
1654 
1655 	// remove any trailing/padding data
1656 	status_t status = gBufferModule->trim(buffer, packetLength);
1657 	if (status != B_OK)
1658 		return status;
1659 
1660 	// check for fragmentation
1661 	uint16 fragmentOffset = header.FragmentOffset();
1662 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1663 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1664 		// this is a fragment
1665 		TRACE("  ipv4_receive_data(): Found a Fragment!");
1666 		status = reassemble_fragments(header, &buffer);
1667 		TRACE("  ipv4_receive_data():  -> %s", strerror(status));
1668 		if (status != B_OK)
1669 			return status;
1670 
1671 		if (buffer == NULL) {
1672 			// buffer was put into fragment packet
1673 			TRACE("  ipv4_receive_data(): Not yet assembled.");
1674 			return B_OK;
1675 		}
1676 	}
1677 
1678 	// Since the buffer might have been changed (reassembled fragment)
1679 	// we must no longer access bufferHeader or header anymore after
1680 	// this point
1681 
1682 	bool rawDelivered = raw_receive_data(buffer);
1683 
1684 	// Preserve the ipv4 header for ICMP processing
1685 	gBufferModule->store_header(buffer);
1686 
1687 	bufferHeader.Remove(headerLength);
1688 		// the header is of variable size and may include IP options
1689 		// (TODO: that we ignore for now)
1690 
1691 	net_protocol_module_info* module = receiving_protocol(buffer->protocol);
1692 	if (module == NULL) {
1693 		// no handler for this packet
1694 		if (!rawDelivered) {
1695 			sDomain->module->error_reply(NULL, buffer,
1696 				B_NET_ERROR_UNREACH_PROTOCOL, NULL);
1697 		}
1698 		return EAFNOSUPPORT;
1699 	}
1700 
1701 	if ((buffer->flags & MSG_MCAST) != 0) {
1702 		// Unfortunately historical reasons dictate that the IP multicast
1703 		// model be a little different from the unicast one. We deliver
1704 		// this frame directly to all sockets registered with interest
1705 		// for this multicast group.
1706 		deliver_multicast(module, buffer, false);
1707 		gBufferModule->free(buffer);
1708 		return B_OK;
1709 	}
1710 
1711 	return module->receive_data(buffer);
1712 }
1713 
1714 
1715 status_t
1716 ipv4_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1717 {
1718 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1719 
1720 	if (protocol->raw == NULL)
1721 		return B_ERROR;
1722 
1723 	return protocol->raw->EnqueueClone(buffer);
1724 }
1725 
1726 
1727 status_t
1728 ipv4_error_received(net_error error, net_buffer* buffer)
1729 {
1730 	TRACE("  ipv4_error_received(error %d, buffer %p [%zu bytes])", (int)error,
1731 		buffer, buffer->size);
1732 
1733 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1734 	if (bufferHeader.Status() != B_OK)
1735 		return bufferHeader.Status();
1736 
1737 	ipv4_header& header = bufferHeader.Data();
1738 	TRACE_ONLY(dump_ipv4_header(header));
1739 
1740 	// We do not check the packet length, as we usually only get a part of it
1741 	uint16 headerLength = header.HeaderLength();
1742 	if (header.version != IPV4_VERSION
1743 		|| headerLength < sizeof(ipv4_header)
1744 		|| gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1745 		return B_BAD_DATA;
1746 
1747 	// Restore addresses of the original buffer
1748 
1749 	// lower layers notion of broadcast or multicast have no relevance to us
1750 	// TODO: they actually have when deciding whether to send an ICMP error
1751 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1752 
1753 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1754 	fill_sockaddr_in((struct sockaddr_in*)buffer->destination,
1755 		header.destination);
1756 
1757 	if (header.destination == INADDR_BROADCAST)
1758 		buffer->flags |= MSG_BCAST;
1759 	else if (IN_MULTICAST(ntohl(header.destination)))
1760 		buffer->flags |= MSG_MCAST;
1761 
1762 	// test if the packet is really from us
1763 	if (!sDatalinkModule->is_local_address(sDomain, buffer->source, NULL,
1764 			NULL)) {
1765 		TRACE("  ipv4_error_received(): packet was not for us %x -> %x",
1766 			ntohl(header.source), ntohl(header.destination));
1767 		return B_ERROR;
1768 	}
1769 
1770 	buffer->protocol = header.protocol;
1771 
1772 	bufferHeader.Remove(headerLength);
1773 
1774 	net_protocol_module_info* protocol = receiving_protocol(buffer->protocol);
1775 	if (protocol == NULL)
1776 		return B_ERROR;
1777 
1778 	// propagate error
1779 	return protocol->error_received(error, buffer);
1780 }
1781 
1782 
1783 status_t
1784 ipv4_error_reply(net_protocol* protocol, net_buffer* cause, net_error error,
1785 	net_error_data* errorData)
1786 {
1787 	// Directly obtain the ICMP protocol module
1788 	net_protocol_module_info* icmp = receiving_protocol(IPPROTO_ICMP);
1789 	if (icmp == NULL)
1790 		return B_ERROR;
1791 
1792 	return icmp->error_reply(protocol, cause, error, errorData);
1793 }
1794 
1795 
1796 ssize_t
1797 ipv4_process_ancillary_data_no_container(net_protocol* protocol,
1798 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1799 {
1800 	ssize_t bytesWritten = 0;
1801 
1802 	if ((((ipv4_protocol*)protocol)->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0) {
1803 		if (msgControlLen < CMSG_SPACE(sizeof(struct in_addr)))
1804 			return B_NO_MEMORY;
1805 
1806 		cmsghdr* messageHeader = (cmsghdr*)msgControl;
1807 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1808 		messageHeader->cmsg_level = IPPROTO_IP;
1809 		messageHeader->cmsg_type = IP_RECVDSTADDR;
1810 
1811 		memcpy(CMSG_DATA(messageHeader),
1812 		 	&((struct sockaddr_in*)buffer->destination)->sin_addr,
1813 		 	sizeof(struct in_addr));
1814 
1815 		bytesWritten += CMSG_SPACE(sizeof(struct in_addr));
1816 	}
1817 
1818 	return bytesWritten;
1819 }
1820 
1821 
1822 //	#pragma mark -
1823 
1824 
1825 status_t
1826 init_ipv4()
1827 {
1828 	sPacketID = (int32)system_time();
1829 
1830 	mutex_init(&sRawSocketsLock, "raw sockets");
1831 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1832 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1833 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1834 
1835 	status_t status;
1836 
1837 	sMulticastState = new MulticastState();
1838 	if (sMulticastState == NULL) {
1839 		status = B_NO_MEMORY;
1840 		goto err4;
1841 	}
1842 
1843 	status = sMulticastState->Init();
1844 	if (status != B_OK)
1845 		goto err5;
1846 
1847 	new (&sFragmentHash) FragmentTable();
1848 	status = sFragmentHash.Init(256);
1849 	if (status != B_OK)
1850 		goto err5;
1851 
1852 	new (&sRawSockets) RawSocketList;
1853 		// static initializers do not work in the kernel,
1854 		// so we have to do it here, manually
1855 		// TODO: for modules, this shouldn't be required
1856 
1857 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1858 		"network/protocols/ipv4/v1", NULL);
1859 	if (status != B_OK)
1860 		goto err6;
1861 
1862 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1863 		&gIPv4AddressModule, &sDomain);
1864 	if (status != B_OK)
1865 		goto err6;
1866 
1867 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1868 		"list all current IPv4 multicast states");
1869 
1870 	return B_OK;
1871 
1872 err6:
1873 	sFragmentHash.~FragmentTable();
1874 err5:
1875 	delete sMulticastState;
1876 err4:
1877 	mutex_destroy(&sReceivingProtocolLock);
1878 	mutex_destroy(&sMulticastGroupsLock);
1879 	mutex_destroy(&sFragmentLock);
1880 	mutex_destroy(&sRawSocketsLock);
1881 	return status;
1882 }
1883 
1884 
1885 status_t
1886 uninit_ipv4()
1887 {
1888 	mutex_lock(&sReceivingProtocolLock);
1889 
1890 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1891 
1892 	// put all the domain receiving protocols we gathered so far
1893 	for (uint32 i = 0; i < 256; i++) {
1894 		if (sReceivingProtocol[i] != NULL)
1895 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1896 	}
1897 
1898 	gStackModule->unregister_domain(sDomain);
1899 	mutex_unlock(&sReceivingProtocolLock);
1900 
1901 	delete sMulticastState;
1902 	sFragmentHash.~FragmentTable();
1903 
1904 	mutex_destroy(&sMulticastGroupsLock);
1905 	mutex_destroy(&sFragmentLock);
1906 	mutex_destroy(&sRawSocketsLock);
1907 	mutex_destroy(&sReceivingProtocolLock);
1908 
1909 	return B_OK;
1910 }
1911 
1912 
1913 static status_t
1914 ipv4_std_ops(int32 op, ...)
1915 {
1916 	switch (op) {
1917 		case B_MODULE_INIT:
1918 			return init_ipv4();
1919 		case B_MODULE_UNINIT:
1920 			return uninit_ipv4();
1921 
1922 		default:
1923 			return B_ERROR;
1924 	}
1925 }
1926 
1927 
1928 net_protocol_module_info gIPv4Module = {
1929 	{
1930 		"network/protocols/ipv4/v1",
1931 		0,
1932 		ipv4_std_ops
1933 	},
1934 	NET_PROTOCOL_ATOMIC_MESSAGES,
1935 
1936 	ipv4_init_protocol,
1937 	ipv4_uninit_protocol,
1938 	ipv4_open,
1939 	ipv4_close,
1940 	ipv4_free,
1941 	ipv4_connect,
1942 	ipv4_accept,
1943 	ipv4_control,
1944 	ipv4_getsockopt,
1945 	ipv4_setsockopt,
1946 	ipv4_bind,
1947 	ipv4_unbind,
1948 	ipv4_listen,
1949 	ipv4_shutdown,
1950 	ipv4_send_data,
1951 	ipv4_send_routed_data,
1952 	ipv4_send_avail,
1953 	ipv4_read_data,
1954 	ipv4_read_avail,
1955 	ipv4_get_domain,
1956 	ipv4_get_mtu,
1957 	ipv4_receive_data,
1958 	ipv4_deliver_data,
1959 	ipv4_error_received,
1960 	ipv4_error_reply,
1961 	NULL,		// add_ancillary_data()
1962 	NULL,		// process_ancillary_data()
1963 	ipv4_process_ancillary_data_no_container,
1964 	NULL,		// send_data_no_buffer()
1965 	NULL		// read_data_no_buffer()
1966 };
1967 
1968 module_dependency module_dependencies[] = {
1969 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1970 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1971 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1972 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1973 	{}
1974 };
1975 
1976 module_info* modules[] = {
1977 	(module_info*)&gIPv4Module,
1978 	NULL
1979 };
1980