xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision 922e7ba1f3228e6f28db69b0ded8f86eb32dea17)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4.h"
11 #include "ipv4_address.h"
12 #include "multicast.h"
13 
14 #include <net_datalink.h>
15 #include <net_datalink_protocol.h>
16 #include <net_device.h>
17 #include <net_protocol.h>
18 #include <net_stack.h>
19 #include <NetBufferUtilities.h>
20 #include <ProtocolUtilities.h>
21 
22 #include <KernelExport.h>
23 #include <util/AutoLock.h>
24 #include <util/list.h>
25 #include <util/DoublyLinkedList.h>
26 #include <util/MultiHashTable.h>
27 
28 #include <netinet/in.h>
29 #include <netinet/ip.h>
30 #include <new>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <utility>
35 
36 
37 //#define TRACE_IPV4
38 #ifdef TRACE_IPV4
39 #	define TRACE(format, args...) \
40 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
41 #	define TRACE_SK(protocol, format, args...) \
42 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
43 			protocol , ##args)
44 #	define TRACE_ONLY(x) x
45 #else
46 #	define TRACE(args...) ;
47 #	define TRACE_SK(args...) ;
48 #	define TRACE_ONLY(x)
49 #endif
50 
51 
52 #define MAX_HASH_FRAGMENTS 		64
53 	// slots in the fragment packet's hash
54 #define FRAGMENT_TIMEOUT		60000000LL
55 	// discard fragment after 60 seconds
56 
57 
58 typedef DoublyLinkedList<struct net_buffer,
59 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
60 
61 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
62 
63 struct ipv4_packet_key {
64 	in_addr_t	source;
65 	in_addr_t	destination;
66 	uint16		id;
67 	uint8		protocol;
68 };
69 
70 
71 class FragmentPacket {
72 public:
73 								FragmentPacket(const ipv4_packet_key& key);
74 								~FragmentPacket();
75 
76 			status_t			AddFragment(uint16 start, uint16 end,
77 									net_buffer* buffer, bool lastFragment);
78 			status_t			Reassemble(net_buffer* to);
79 
80 			bool				IsComplete() const
81 									{ return fReceivedLastFragment
82 										&& fBytesLeft == 0; }
83 
84 			const ipv4_packet_key& Key() const { return fKey; }
85 			FragmentPacket*&	HashTableLink() { return fNext; }
86 
87 	static	void				StaleTimer(struct net_timer* timer, void* data);
88 
89 private:
90 			FragmentPacket*		fNext;
91 			struct ipv4_packet_key fKey;
92 			uint32				fIndex;
93 			bool				fReceivedLastFragment;
94 			int32				fBytesLeft;
95 			FragmentList		fFragments;
96 			net_timer			fTimer;
97 };
98 
99 
100 struct FragmentHashDefinition {
101 	typedef ipv4_packet_key KeyType;
102 	typedef FragmentPacket ValueType;
103 
104 	size_t HashKey(const KeyType& key) const
105 	{
106 		return (key.source ^ key.destination ^ key.protocol ^ key.id);
107 	}
108 
109 	size_t Hash(ValueType* value) const
110 	{
111 		return HashKey(value->Key());
112 	}
113 
114 	bool Compare(const KeyType& key, ValueType* value) const
115 	{
116 		const ipv4_packet_key& packetKey = value->Key();
117 
118 		return packetKey.id == key.id
119 			&& packetKey.source == key.source
120 			&& packetKey.destination == key.destination
121 			&& packetKey.protocol == key.protocol;
122 	}
123 
124 	ValueType*& GetLink(ValueType* value) const
125 	{
126 		return value->HashTableLink();
127 	}
128 };
129 
130 typedef BOpenHashTable<FragmentHashDefinition, false, true> FragmentTable;
131 
132 
133 class RawSocket
134 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
135 public:
136 								RawSocket(net_socket* socket);
137 };
138 
139 typedef DoublyLinkedList<RawSocket> RawSocketList;
140 
141 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
142 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
143 
144 struct MulticastStateHash {
145 	typedef std::pair<const in_addr* , uint32> KeyType;
146 	typedef IPv4GroupInterface ValueType;
147 
148 	size_t HashKey(const KeyType &key) const
149 		{ return key.first->s_addr ^ key.second; }
150 	size_t Hash(ValueType* value) const
151 		{ return HashKey(std::make_pair(&value->Address(),
152 			value->Interface()->index)); }
153 	bool Compare(const KeyType &key, ValueType* value) const
154 		{ return value->Interface()->index == key.second
155 			&& value->Address().s_addr == key.first->s_addr; }
156 	bool CompareValues(ValueType* value1, ValueType* value2) const
157 		{ return value1->Interface()->index == value2->Interface()->index
158 			&& value1->Address().s_addr == value2->Address().s_addr; }
159 	ValueType*& GetLink(ValueType* value) const { return value->HashLink(); }
160 };
161 
162 
163 struct ipv4_protocol : net_protocol {
164 	ipv4_protocol()
165 		:
166 		multicast_filter(this)
167 	{
168 	}
169 
170 	RawSocket*			raw;
171 	uint8				service_type;
172 	uint8				time_to_live;
173 	uint8				multicast_time_to_live;
174 	uint32				flags;
175 	struct sockaddr*	multicast_address; // for IP_MULTICAST_IF
176 
177 	IPv4MulticastFilter	multicast_filter;
178 };
179 
180 // protocol flags
181 #define IP_FLAG_HEADER_INCLUDED		0x01
182 #define IP_FLAG_RECEIVE_DEST_ADDR	0x02
183 
184 
185 static const int kDefaultTTL = 254;
186 static const int kDefaultMulticastTTL = 1;
187 
188 
189 extern net_protocol_module_info gIPv4Module;
190 	// we need this in ipv4_std_ops() for registering the AF_INET domain
191 
192 net_stack_module_info* gStackModule;
193 net_buffer_module_info* gBufferModule;
194 
195 static struct net_domain* sDomain;
196 static net_datalink_module_info* sDatalinkModule;
197 static net_socket_module_info* sSocketModule;
198 static int32 sPacketID;
199 static RawSocketList sRawSockets;
200 static mutex sRawSocketsLock;
201 static mutex sFragmentLock;
202 static FragmentTable sFragmentHash;
203 static mutex sMulticastGroupsLock;
204 
205 typedef MultiHashTable<MulticastStateHash> MulticastState;
206 static MulticastState* sMulticastState;
207 
208 static net_protocol_module_info* sReceivingProtocol[256];
209 static mutex sReceivingProtocolLock;
210 
211 
212 static const char*
213 print_address(const in_addr* address, char* buf, size_t bufLen)
214 {
215 	unsigned int addr = ntohl(address->s_addr);
216 
217 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
218 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
219 
220 	return buf;
221 }
222 
223 
224 RawSocket::RawSocket(net_socket* socket)
225 	:
226 	DatagramSocket<>("ipv4 raw socket", socket)
227 {
228 }
229 
230 
231 //	#pragma mark -
232 
233 
234 FragmentPacket::FragmentPacket(const ipv4_packet_key& key)
235 	:
236 	fKey(key),
237 	fIndex(0),
238 	fReceivedLastFragment(false),
239 	fBytesLeft(IP_MAXPACKET)
240 {
241 	gStackModule->init_timer(&fTimer, FragmentPacket::StaleTimer, this);
242 }
243 
244 
245 FragmentPacket::~FragmentPacket()
246 {
247 	// cancel the kill timer
248 	gStackModule->set_timer(&fTimer, -1);
249 
250 	// delete all fragments
251 	net_buffer* buffer;
252 	while ((buffer = fFragments.RemoveHead()) != NULL) {
253 		gBufferModule->free(buffer);
254 	}
255 }
256 
257 
258 status_t
259 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
260 	bool lastFragment)
261 {
262 	// restart the timer
263 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
264 
265 	if (start >= end) {
266 		// invalid fragment
267 		return B_BAD_DATA;
268 	}
269 
270 	// Search for a position in the list to insert the fragment
271 
272 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
273 	net_buffer* previous = NULL;
274 	net_buffer* next = NULL;
275 	while ((previous = iterator.Next()) != NULL) {
276 		if (previous->fragment.start <= start) {
277 			// The new fragment can be inserted after this one
278 			break;
279 		}
280 
281 		next = previous;
282 	}
283 
284 	// See if we already have the fragment's data
285 
286 	if (previous != NULL && previous->fragment.start <= start
287 		&& previous->fragment.end >= end) {
288 		// we do, so we can just drop this fragment
289 		gBufferModule->free(buffer);
290 		return B_OK;
291 	}
292 
293 	fIndex = buffer->index;
294 		// adopt the buffer's device index
295 
296 	TRACE("    previous: %p, next: %p", previous, next);
297 
298 	// If we have parts of the data already, truncate as needed
299 
300 	if (previous != NULL && previous->fragment.end > start) {
301 		TRACE("    remove header %d bytes", previous->fragment.end - start);
302 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
303 		start = previous->fragment.end;
304 	}
305 	if (next != NULL && next->fragment.start < end) {
306 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
307 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
308 		end = next->fragment.start;
309 	}
310 
311 	// Now try if we can already merge the fragments together
312 
313 	// We will always keep the last buffer received, so that we can still
314 	// report an error (in which case we're not responsible for freeing it)
315 
316 	if (previous != NULL && previous->fragment.end == start) {
317 		fFragments.Remove(previous);
318 
319 		buffer->fragment.start = previous->fragment.start;
320 		buffer->fragment.end = end;
321 
322 		status_t status = gBufferModule->merge(buffer, previous, false);
323 		TRACE("    merge previous: %s", strerror(status));
324 		if (status != B_OK) {
325 			fFragments.Insert(next, previous);
326 			return status;
327 		}
328 
329 		fFragments.Insert(next, buffer);
330 
331 		// cut down existing hole
332 		fBytesLeft -= end - start;
333 
334 		if (lastFragment && !fReceivedLastFragment) {
335 			fReceivedLastFragment = true;
336 			fBytesLeft -= IP_MAXPACKET - end;
337 		}
338 
339 		TRACE("    hole length: %d", (int)fBytesLeft);
340 
341 		return B_OK;
342 	} else if (next != NULL && next->fragment.start == end) {
343 		net_buffer* afterNext = (net_buffer*)next->link.next;
344 		fFragments.Remove(next);
345 
346 		buffer->fragment.start = start;
347 		buffer->fragment.end = next->fragment.end;
348 
349 		status_t status = gBufferModule->merge(buffer, next, true);
350 		TRACE("    merge next: %s", strerror(status));
351 		if (status != B_OK) {
352 			// Insert "next" at its previous position
353 			fFragments.Insert(afterNext, next);
354 			return status;
355 		}
356 
357 		fFragments.Insert(afterNext, buffer);
358 
359 		// cut down existing hole
360 		fBytesLeft -= end - start;
361 
362 		if (lastFragment && !fReceivedLastFragment) {
363 			fReceivedLastFragment = true;
364 			fBytesLeft -= IP_MAXPACKET - end;
365 		}
366 
367 		TRACE("    hole length: %d", (int)fBytesLeft);
368 
369 		return B_OK;
370 	}
371 
372 	// We couldn't merge the fragments, so we need to add it as is
373 
374 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
375 
376 	buffer->fragment.start = start;
377 	buffer->fragment.end = end;
378 	fFragments.Insert(next, buffer);
379 
380 	// update length of the hole, if any
381 	fBytesLeft -= end - start;
382 
383 	if (lastFragment && !fReceivedLastFragment) {
384 		fReceivedLastFragment = true;
385 		fBytesLeft -= IP_MAXPACKET - end;
386 	}
387 
388 	TRACE("    hole length: %d", (int)fBytesLeft);
389 
390 	return B_OK;
391 }
392 
393 
394 /*!	Reassembles the fragments to the specified buffer \a to.
395 	This buffer must have been added via AddFragment() before.
396 */
397 status_t
398 FragmentPacket::Reassemble(net_buffer* to)
399 {
400 	if (!IsComplete())
401 		return B_ERROR;
402 
403 	net_buffer* buffer = NULL;
404 
405 	net_buffer* fragment;
406 	while ((fragment = fFragments.RemoveHead()) != NULL) {
407 		if (buffer != NULL) {
408 			status_t status;
409 			if (to == fragment) {
410 				status = gBufferModule->merge(fragment, buffer, false);
411 				buffer = fragment;
412 			} else
413 				status = gBufferModule->merge(buffer, fragment, true);
414 			if (status != B_OK)
415 				return status;
416 		} else
417 			buffer = fragment;
418 	}
419 
420 	if (buffer != to)
421 		panic("ipv4 packet reassembly did not work correctly.");
422 
423 	to->index = fIndex;
424 		// reset the buffer's device index
425 
426 	return B_OK;
427 }
428 
429 
430 /*static*/ void
431 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
432 {
433 	FragmentPacket* packet = (FragmentPacket*)data;
434 	TRACE("Assembling FragmentPacket %p timed out!", packet);
435 
436 	MutexLocker locker(&sFragmentLock);
437 	sFragmentHash.Remove(packet);
438 	locker.Unlock();
439 
440 	if (!packet->fFragments.IsEmpty()) {
441 		// Send error: fragment reassembly time exceeded
442 		sDomain->module->error_reply(NULL, packet->fFragments.First(),
443 			B_NET_ERROR_REASSEMBLY_TIME_EXCEEDED, NULL);
444 	}
445 
446 	delete packet;
447 }
448 
449 
450 //	#pragma mark -
451 
452 
453 #ifdef TRACE_IPV4
454 static void
455 dump_ipv4_header(ipv4_header &header)
456 {
457 	struct pretty_ipv4 {
458 	#if B_HOST_IS_LENDIAN == 1
459 		uint8 a;
460 		uint8 b;
461 		uint8 c;
462 		uint8 d;
463 	#else
464 		uint8 d;
465 		uint8 c;
466 		uint8 b;
467 		uint8 a;
468 	#endif
469 	};
470 	struct pretty_ipv4* src = (struct pretty_ipv4*)&header.source;
471 	struct pretty_ipv4* dst = (struct pretty_ipv4*)&header.destination;
472 	dprintf("  version: %d\n", header.version);
473 	dprintf("  header_length: 4 * %d\n", header.header_length);
474 	dprintf("  service_type: %d\n", header.service_type);
475 	dprintf("  total_length: %d\n", header.TotalLength());
476 	dprintf("  id: %d\n", ntohs(header.id));
477 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
478 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
479 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
480 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
481 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
482 	dprintf("  time_to_live: %d\n", header.time_to_live);
483 	dprintf("  protocol: %d\n", header.protocol);
484 	dprintf("  checksum: %d\n", ntohs(header.checksum));
485 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
486 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
487 }
488 #endif	// TRACE_IPV4
489 
490 
491 static int
492 dump_ipv4_multicast(int argc, char** argv)
493 {
494 	MulticastState::Iterator it = sMulticastState->GetIterator();
495 
496 	while (it.HasNext()) {
497 		IPv4GroupInterface* state = it.Next();
498 
499 		char addressBuffer[64];
500 
501 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
502 			print_address(&state->Address(), addressBuffer,
503 			sizeof(addressBuffer)),
504 			state->Mode() == IPv4GroupInterface::kExclude
505 				? "Exclude" : "Include");
506 
507 		int count = 0;
508 		IPv4GroupInterface::AddressSet::Iterator it
509 			= state->Sources().GetIterator();
510 		while (it.HasNext()) {
511 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
512 				addressBuffer, sizeof(addressBuffer)));
513 			count++;
514 		}
515 
516 		kprintf("}> sock %p\n", state->Parent()->Socket());
517 	}
518 
519 	return 0;
520 }
521 
522 
523 /*!	Attempts to re-assemble fragmented packets.
524 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
525 		will point to its buffer, otherwise, it will be \c NULL.
526 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
527 */
528 static status_t
529 reassemble_fragments(const ipv4_header &header, net_buffer** _buffer)
530 {
531 	net_buffer* buffer = *_buffer;
532 	status_t status;
533 
534 	struct ipv4_packet_key key;
535 	key.source = (in_addr_t)header.source;
536 	key.destination = (in_addr_t)header.destination;
537 	key.id = header.id;
538 	key.protocol = header.protocol;
539 
540 	// TODO: Make locking finer grained.
541 	MutexLocker locker(&sFragmentLock);
542 
543 	FragmentPacket* packet = sFragmentHash.Lookup(key);
544 	if (packet == NULL) {
545 		// New fragment packet
546 		packet = new (std::nothrow) FragmentPacket(key);
547 		if (packet == NULL)
548 			return B_NO_MEMORY;
549 
550 		// add packet to hash
551 		status = sFragmentHash.Insert(packet);
552 		if (status != B_OK) {
553 			delete packet;
554 			return status;
555 		}
556 	}
557 
558 	uint16 fragmentOffset = header.FragmentOffset();
559 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
560 	uint16 end = start + header.TotalLength() - header.HeaderLength();
561 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
562 
563 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
564 		lastFragment ? "last ": "", end - start, start);
565 
566 	// Remove header unless this is the first fragment
567 	if (start != 0)
568 		gBufferModule->remove_header(buffer, header.HeaderLength());
569 
570 	status = packet->AddFragment(start, end, buffer, lastFragment);
571 	if (status != B_OK)
572 		return status;
573 
574 	if (packet->IsComplete()) {
575 		sFragmentHash.Remove(packet);
576 			// no matter if reassembling succeeds, we won't need this packet
577 			// anymore
578 
579 		status = packet->Reassemble(buffer);
580 		delete packet;
581 
582 		// _buffer does not change
583 		return status;
584 	}
585 
586 	// This indicates that the packet is not yet complete
587 	*_buffer = NULL;
588 	return B_OK;
589 }
590 
591 
592 /*!	Fragments the incoming buffer and send all fragments via the specified
593 	\a route.
594 */
595 static status_t
596 send_fragments(ipv4_protocol* protocol, struct net_route* route,
597 	net_buffer* buffer, uint32 mtu)
598 {
599 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
600 
601 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
602 	if (originalHeader.Status() != B_OK)
603 		return originalHeader.Status();
604 
605 	uint16 headerLength = originalHeader->HeaderLength();
606 	uint32 bytesLeft = buffer->size - headerLength;
607 	uint32 fragmentOffset = 0;
608 	status_t status = B_OK;
609 
610 	net_buffer* headerBuffer = gBufferModule->split(buffer, headerLength);
611 	if (headerBuffer == NULL)
612 		return B_NO_MEMORY;
613 
614 	// TODO: we need to make sure ipv4_header is contiguous or
615 	// use another construct.
616 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
617 	ipv4_header* header = &bufferHeader.Data();
618 
619 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
620 	// this way)
621 	mtu -= headerLength;
622 	mtu &= ~7;
623 	TRACE("  adjusted MTU to %ld, bytesLeft %ld", mtu, bytesLeft);
624 
625 	while (bytesLeft > 0) {
626 		uint32 fragmentLength = min_c(bytesLeft, mtu);
627 		bytesLeft -= fragmentLength;
628 		bool lastFragment = bytesLeft == 0;
629 
630 		header->total_length = htons(fragmentLength + headerLength);
631 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
632 			| (fragmentOffset >> 3));
633 		header->checksum = 0;
634 		header->checksum = gStackModule->checksum((uint8*)header,
635 			headerLength);
636 			// TODO: compute the checksum only for those parts that changed?
637 
638 		TRACE("  send fragment of %ld bytes (%ld bytes left)", fragmentLength,
639 			bytesLeft);
640 
641 		net_buffer* fragmentBuffer;
642 		if (!lastFragment) {
643 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
644 			fragmentOffset += fragmentLength;
645 		} else
646 			fragmentBuffer = buffer;
647 
648 		if (fragmentBuffer == NULL) {
649 			status = B_NO_MEMORY;
650 			break;
651 		}
652 
653 		// copy header to fragment
654 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
655 
656 		// send fragment
657 		if (status == B_OK)
658 			status = sDatalinkModule->send_routed_data(route, fragmentBuffer);
659 
660 		if (lastFragment) {
661 			// we don't own the last buffer, so we don't have to free it
662 			break;
663 		}
664 
665 		if (status != B_OK) {
666 			gBufferModule->free(fragmentBuffer);
667 			break;
668 		}
669 	}
670 
671 	gBufferModule->free(headerBuffer);
672 	return status;
673 }
674 
675 
676 /*!	Delivers the provided \a buffer to all listeners of this multicast group.
677 	Does not take over ownership of the buffer.
678 */
679 static bool
680 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
681 	bool deliverToRaw)
682 {
683 	if (module->deliver_data == NULL)
684 		return false;
685 
686 	// TODO: fix multicast!
687 	return false;
688 	MutexLocker _(sMulticastGroupsLock);
689 
690 	sockaddr_in* multicastAddr = (sockaddr_in*)buffer->destination;
691 
692 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
693 		&multicastAddr->sin_addr, buffer->interface_address->interface->index));
694 
695 	size_t count = 0;
696 
697 	while (it.HasNext()) {
698 		IPv4GroupInterface* state = it.Next();
699 
700 		ipv4_protocol* ipProtocol = state->Parent()->Socket();
701 		if (deliverToRaw && (ipProtocol->raw == NULL
702 				|| ipProtocol->socket->protocol != buffer->protocol))
703 			continue;
704 
705 		if (state->FilterAccepts(buffer)) {
706 			net_protocol* protocol = ipProtocol;
707 			if (protocol->module != module) {
708 				// as multicast filters are installed with an IPv4 protocol
709 				// reference, we need to go and find the appropriate instance
710 				// related to the 'receiving protocol' with module 'module'.
711 				net_protocol* protocol = ipProtocol->socket->first_protocol;
712 
713 				while (protocol != NULL && protocol->module != module)
714 					protocol = protocol->next;
715 			}
716 
717 			if (protocol != NULL) {
718 				module->deliver_data(protocol, buffer);
719 				count++;
720 			}
721 		}
722 	}
723 
724 	return count > 0;
725 }
726 
727 
728 /*!	Delivers the buffer to all listening raw sockets without taking ownership of
729 	the provided \a buffer.
730 	Returns \c true if there was any receiver, \c false if not.
731 */
732 static bool
733 raw_receive_data(net_buffer* buffer)
734 {
735 	MutexLocker locker(sRawSocketsLock);
736 
737 	if (sRawSockets.IsEmpty())
738 		return false;
739 
740 	TRACE("RawReceiveData(%i)", buffer->protocol);
741 
742 	if ((buffer->flags & MSG_MCAST) != 0) {
743 		// we need to call deliver_multicast here separately as
744 		// buffer still has the IP header, and it won't in the
745 		// next call. This isn't very optimized but works for now.
746 		// A better solution would be to hold separate hash tables
747 		// and lists for RAW and non-RAW sockets.
748 		return deliver_multicast(&gIPv4Module, buffer, true);
749 	}
750 
751 	RawSocketList::Iterator iterator = sRawSockets.GetIterator();
752 	size_t count = 0;
753 
754 	while (iterator.HasNext()) {
755 		RawSocket* raw = iterator.Next();
756 
757 		if (raw->Socket()->protocol == buffer->protocol) {
758 			raw->EnqueueClone(buffer);
759 			count++;
760 		}
761 	}
762 
763 	return count > 0;
764 }
765 
766 
767 static inline sockaddr*
768 fill_sockaddr_in(sockaddr_in* target, in_addr_t address)
769 {
770 	target->sin_family = AF_INET;
771 	target->sin_len = sizeof(sockaddr_in);
772 	target->sin_port = 0;
773 	target->sin_addr.s_addr = address;
774 	return (sockaddr*)target;
775 }
776 
777 
778 static status_t
779 get_int_option(void* target, size_t length, int value)
780 {
781 	if (length != sizeof(int))
782 		return B_BAD_VALUE;
783 
784 	return user_memcpy(target, &value, sizeof(int));
785 }
786 
787 
788 template<typename Type> static status_t
789 set_int_option(Type &target, const void* _value, size_t length)
790 {
791 	int value;
792 
793 	if (length != sizeof(int))
794 		return B_BAD_VALUE;
795 
796 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
797 		return B_BAD_ADDRESS;
798 
799 	target = value;
800 	return B_OK;
801 }
802 
803 
804 static net_protocol_module_info*
805 receiving_protocol(uint8 protocol)
806 {
807 	net_protocol_module_info* module = sReceivingProtocol[protocol];
808 	if (module != NULL)
809 		return module;
810 
811 	MutexLocker locker(sReceivingProtocolLock);
812 
813 	module = sReceivingProtocol[protocol];
814 	if (module != NULL)
815 		return module;
816 
817 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
818 			&module) == B_OK)
819 		sReceivingProtocol[protocol] = module;
820 
821 	return module;
822 }
823 
824 
825 // #pragma mark - multicast
826 
827 
828 status_t
829 IPv4Multicast::JoinGroup(IPv4GroupInterface* state)
830 {
831 	MutexLocker _(sMulticastGroupsLock);
832 
833 	sockaddr_in groupAddr;
834 	status_t status = sDatalinkModule->join_multicast(state->Interface(),
835 		sDomain, fill_sockaddr_in(&groupAddr, state->Address().s_addr));
836 	if (status != B_OK)
837 		return status;
838 
839 	sMulticastState->Insert(state);
840 	return B_OK;
841 }
842 
843 
844 status_t
845 IPv4Multicast::LeaveGroup(IPv4GroupInterface* state)
846 {
847 	MutexLocker _(sMulticastGroupsLock);
848 
849 	sMulticastState->Remove(state);
850 
851 	sockaddr_in groupAddr;
852 	return sDatalinkModule->leave_multicast(state->Interface(), sDomain,
853 		fill_sockaddr_in(&groupAddr, state->Address().s_addr));
854 }
855 
856 
857 static status_t
858 ipv4_delta_group(IPv4GroupInterface* group, int option,
859 	net_interface* interface, const in_addr* sourceAddr)
860 {
861 	switch (option) {
862 		case IP_ADD_MEMBERSHIP:
863 			return group->Add();
864 		case IP_DROP_MEMBERSHIP:
865 			return group->Drop();
866 		case IP_BLOCK_SOURCE:
867 			return group->BlockSource(*sourceAddr);
868 		case IP_UNBLOCK_SOURCE:
869 			return group->UnblockSource(*sourceAddr);
870 		case IP_ADD_SOURCE_MEMBERSHIP:
871 			return group->AddSSM(*sourceAddr);
872 		case IP_DROP_SOURCE_MEMBERSHIP:
873 			return group->DropSSM(*sourceAddr);
874 	}
875 
876 	return B_ERROR;
877 }
878 
879 
880 static status_t
881 ipv4_delta_membership(ipv4_protocol* protocol, int option,
882 	net_interface* interface, const in_addr* groupAddr,
883 	const in_addr* sourceAddr)
884 {
885 	IPv4MulticastFilter& filter = protocol->multicast_filter;
886 	IPv4GroupInterface* state = NULL;
887 	status_t status = B_OK;
888 
889 	switch (option) {
890 		case IP_ADD_MEMBERSHIP:
891 		case IP_ADD_SOURCE_MEMBERSHIP:
892 			status = filter.GetState(*groupAddr, interface, state, true);
893 			break;
894 
895 		case IP_DROP_MEMBERSHIP:
896 		case IP_BLOCK_SOURCE:
897 		case IP_UNBLOCK_SOURCE:
898 		case IP_DROP_SOURCE_MEMBERSHIP:
899 			filter.GetState(*groupAddr, interface, state, false);
900 			if (state == NULL) {
901 				if (option == IP_DROP_MEMBERSHIP
902 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
903 					return EADDRNOTAVAIL;
904 
905 				return B_BAD_VALUE;
906 			}
907 			break;
908 	}
909 
910 	if (status != B_OK)
911 		return status;
912 
913 	status = ipv4_delta_group(state, option, interface, sourceAddr);
914 	filter.ReturnState(state);
915 	return status;
916 }
917 
918 
919 static int
920 generic_to_ipv4(int option)
921 {
922 	switch (option) {
923 		case MCAST_JOIN_GROUP:
924 			return IP_ADD_MEMBERSHIP;
925 		case MCAST_JOIN_SOURCE_GROUP:
926 			return IP_ADD_SOURCE_MEMBERSHIP;
927 		case MCAST_LEAVE_GROUP:
928 			return IP_DROP_MEMBERSHIP;
929 		case MCAST_BLOCK_SOURCE:
930 			return IP_BLOCK_SOURCE;
931 		case MCAST_UNBLOCK_SOURCE:
932 			return IP_UNBLOCK_SOURCE;
933 		case MCAST_LEAVE_SOURCE_GROUP:
934 			return IP_DROP_SOURCE_MEMBERSHIP;
935 	}
936 
937 	return -1;
938 }
939 
940 
941 static net_interface*
942 get_multicast_interface(ipv4_protocol* protocol, const in_addr* address)
943 {
944 	// TODO: this is broken and leaks references
945 	sockaddr_in groupAddr;
946 	net_route* route = sDatalinkModule->get_route(sDomain,
947 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
948 	if (route == NULL)
949 		return NULL;
950 
951 	return route->interface_address->interface;
952 }
953 
954 
955 static status_t
956 ipv4_delta_membership(ipv4_protocol* protocol, int option,
957 	in_addr* interfaceAddr, in_addr* groupAddr, in_addr* sourceAddr)
958 {
959 	net_interface* interface = NULL;
960 
961 	if (interfaceAddr->s_addr == INADDR_ANY) {
962 		interface = get_multicast_interface(protocol, groupAddr);
963 	} else {
964 		sockaddr_in address;
965 		interface = sDatalinkModule->get_interface_with_address(
966 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
967 	}
968 
969 	if (interface == NULL)
970 		return B_DEVICE_NOT_FOUND;
971 
972 	return ipv4_delta_membership(protocol, option, interface,
973 		groupAddr, sourceAddr);
974 }
975 
976 
977 static status_t
978 ipv4_generic_delta_membership(ipv4_protocol* protocol, int option,
979 	uint32 index, const sockaddr_storage* _groupAddr,
980 	const sockaddr_storage* _sourceAddr)
981 {
982 	if (_groupAddr->ss_family != AF_INET
983 		|| (_sourceAddr != NULL && _sourceAddr->ss_family != AF_INET))
984 		return B_BAD_VALUE;
985 
986 	const in_addr* groupAddr = &((const sockaddr_in*)_groupAddr)->sin_addr;
987 
988 	// TODO: this is broken and leaks references
989 	net_interface* interface;
990 	if (index == 0)
991 		interface = get_multicast_interface(protocol, groupAddr);
992 	else
993 		interface = sDatalinkModule->get_interface(sDomain, index);
994 
995 	if (interface == NULL)
996 		return B_DEVICE_NOT_FOUND;
997 
998 	const in_addr* sourceAddr = NULL;
999 	if (_sourceAddr != NULL)
1000 		sourceAddr = &((const sockaddr_in*)_sourceAddr)->sin_addr;
1001 
1002 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
1003 		groupAddr, sourceAddr);
1004 }
1005 
1006 
1007 //	#pragma mark - module interface
1008 
1009 
1010 net_protocol*
1011 ipv4_init_protocol(net_socket* socket)
1012 {
1013 	ipv4_protocol* protocol = new (std::nothrow) ipv4_protocol();
1014 	if (protocol == NULL)
1015 		return NULL;
1016 
1017 	protocol->raw = NULL;
1018 	protocol->service_type = 0;
1019 	protocol->time_to_live = kDefaultTTL;
1020 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
1021 	protocol->flags = 0;
1022 	protocol->multicast_address = NULL;
1023 	return protocol;
1024 }
1025 
1026 
1027 status_t
1028 ipv4_uninit_protocol(net_protocol* _protocol)
1029 {
1030 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1031 
1032 	delete protocol->raw;
1033 	delete protocol->multicast_address;
1034 	delete protocol;
1035 	return B_OK;
1036 }
1037 
1038 
1039 /*!	Since open() is only called on the top level protocol, when we get here
1040 	it means we are on a SOCK_RAW socket.
1041 */
1042 status_t
1043 ipv4_open(net_protocol* _protocol)
1044 {
1045 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1046 
1047 	// Only root may open raw sockets
1048 	if (geteuid() != 0)
1049 		return B_NOT_ALLOWED;
1050 
1051 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1052 	if (raw == NULL)
1053 		return B_NO_MEMORY;
1054 
1055 	status_t status = raw->InitCheck();
1056 	if (status != B_OK) {
1057 		delete raw;
1058 		return status;
1059 	}
1060 
1061 	TRACE_SK(protocol, "Open()");
1062 
1063 	protocol->raw = raw;
1064 
1065 	MutexLocker locker(sRawSocketsLock);
1066 	sRawSockets.Add(raw);
1067 	return B_OK;
1068 }
1069 
1070 
1071 status_t
1072 ipv4_close(net_protocol* _protocol)
1073 {
1074 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1075 	RawSocket* raw = protocol->raw;
1076 	if (raw == NULL)
1077 		return B_ERROR;
1078 
1079 	TRACE_SK(protocol, "Close()");
1080 
1081 	MutexLocker locker(sRawSocketsLock);
1082 	sRawSockets.Remove(raw);
1083 	delete raw;
1084 	protocol->raw = NULL;
1085 
1086 	return B_OK;
1087 }
1088 
1089 
1090 status_t
1091 ipv4_free(net_protocol* protocol)
1092 {
1093 	return B_OK;
1094 }
1095 
1096 
1097 status_t
1098 ipv4_connect(net_protocol* protocol, const struct sockaddr* address)
1099 {
1100 	return B_ERROR;
1101 }
1102 
1103 
1104 status_t
1105 ipv4_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1106 {
1107 	return B_NOT_SUPPORTED;
1108 }
1109 
1110 
1111 status_t
1112 ipv4_control(net_protocol* _protocol, int level, int option, void* value,
1113 	size_t* _length)
1114 {
1115 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1116 		return sDatalinkModule->control(sDomain, option, value, _length);
1117 
1118 	return B_BAD_VALUE;
1119 }
1120 
1121 
1122 status_t
1123 ipv4_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1124 	int* _length)
1125 {
1126 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1127 
1128 	if (level == IPPROTO_IP) {
1129 		if (option == IP_HDRINCL) {
1130 			return get_int_option(value, *_length,
1131 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1132 		}
1133 		if (option == IP_RECVDSTADDR) {
1134 			return get_int_option(value, *_length,
1135 				(protocol->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0);
1136 		}
1137 		if (option == IP_TTL)
1138 			return get_int_option(value, *_length, protocol->time_to_live);
1139 		if (option == IP_TOS)
1140 			return get_int_option(value, *_length, protocol->service_type);
1141 		if (option == IP_MULTICAST_TTL) {
1142 			return get_int_option(value, *_length,
1143 				protocol->multicast_time_to_live);
1144 		}
1145 		if (option == IP_ADD_MEMBERSHIP
1146 			|| option == IP_DROP_MEMBERSHIP
1147 			|| option == IP_BLOCK_SOURCE
1148 			|| option == IP_UNBLOCK_SOURCE
1149 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1150 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1151 			|| option == MCAST_JOIN_GROUP
1152 			|| option == MCAST_LEAVE_GROUP
1153 			|| option == MCAST_BLOCK_SOURCE
1154 			|| option == MCAST_UNBLOCK_SOURCE
1155 			|| option == MCAST_JOIN_SOURCE_GROUP
1156 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1157 			// RFC 3678, Section 4.1:
1158 			// ``An error of EOPNOTSUPP is returned if these options are
1159 			// used with getsockopt().''
1160 			return B_NOT_SUPPORTED;
1161 		}
1162 
1163 		dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1164 		return ENOPROTOOPT;
1165 	}
1166 
1167 	return sSocketModule->get_option(protocol->socket, level, option, value,
1168 		_length);
1169 }
1170 
1171 
1172 status_t
1173 ipv4_setsockopt(net_protocol* _protocol, int level, int option,
1174 	const void* value, int length)
1175 {
1176 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1177 
1178 	if (level == IPPROTO_IP) {
1179 		if (option == IP_HDRINCL) {
1180 			int headerIncluded;
1181 			if (length != sizeof(int))
1182 				return B_BAD_VALUE;
1183 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded))
1184 					!= B_OK)
1185 				return B_BAD_ADDRESS;
1186 
1187 			if (headerIncluded)
1188 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1189 			else
1190 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1191 
1192 			return B_OK;
1193 		}
1194 		if (option == IP_RECVDSTADDR) {
1195 			int getAddress;
1196 			if (length != sizeof(int))
1197 				return B_BAD_VALUE;
1198 			if (user_memcpy(&getAddress, value, sizeof(int)) != B_OK)
1199 				return B_BAD_ADDRESS;
1200 
1201 			if (getAddress && (protocol->socket->type == SOCK_DGRAM
1202 					|| protocol->socket->type == SOCK_RAW))
1203 				protocol->flags |= IP_FLAG_RECEIVE_DEST_ADDR;
1204 			else
1205 				protocol->flags &= ~IP_FLAG_RECEIVE_DEST_ADDR;
1206 
1207 			return B_OK;
1208 		}
1209 		if (option == IP_TTL)
1210 			return set_int_option(protocol->time_to_live, value, length);
1211 		if (option == IP_TOS)
1212 			return set_int_option(protocol->service_type, value, length);
1213 		if (option == IP_MULTICAST_IF) {
1214 			if (length != sizeof(struct in_addr))
1215 				return B_BAD_VALUE;
1216 
1217 			struct sockaddr_in* address = new (std::nothrow) sockaddr_in;
1218 			if (address == NULL)
1219 				return B_NO_MEMORY;
1220 
1221 			if (user_memcpy(&address->sin_addr, value, sizeof(struct in_addr))
1222 					!= B_OK) {
1223 				delete address;
1224 				return B_BAD_ADDRESS;
1225 			}
1226 
1227 			// Using INADDR_ANY to remove the previous setting.
1228 			if (address->sin_addr.s_addr == htonl(INADDR_ANY)) {
1229 				delete address;
1230 				delete protocol->multicast_address;
1231 				protocol->multicast_address = NULL;
1232 				return B_OK;
1233 			}
1234 
1235 			struct net_interface* interface
1236 				= sDatalinkModule->get_interface_with_address(
1237 					(sockaddr*)address);
1238 			if (interface == NULL) {
1239 				delete address;
1240 				return EADDRNOTAVAIL;
1241 			}
1242 
1243 			delete protocol->multicast_address;
1244 			protocol->multicast_address = (struct sockaddr*)address;
1245 
1246 			sDatalinkModule->put_interface(interface);
1247 			return B_OK;
1248 		}
1249 		if (option == IP_MULTICAST_TTL) {
1250 			return set_int_option(protocol->multicast_time_to_live, value,
1251 				length);
1252 		}
1253 		if (option == IP_ADD_MEMBERSHIP || option == IP_DROP_MEMBERSHIP) {
1254 			ip_mreq mreq;
1255 			if (length != sizeof(ip_mreq))
1256 				return B_BAD_VALUE;
1257 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) != B_OK)
1258 				return B_BAD_ADDRESS;
1259 
1260 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1261 				&mreq.imr_multiaddr, NULL);
1262 		}
1263 		if (option == IP_BLOCK_SOURCE
1264 			|| option == IP_UNBLOCK_SOURCE
1265 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1266 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1267 			ip_mreq_source mreq;
1268 			if (length != sizeof(ip_mreq_source))
1269 				return B_BAD_VALUE;
1270 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) != B_OK)
1271 				return B_BAD_ADDRESS;
1272 
1273 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1274 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1275 		}
1276 		if (option == MCAST_LEAVE_GROUP || option == MCAST_JOIN_GROUP) {
1277 			group_req greq;
1278 			if (length != sizeof(group_req))
1279 				return B_BAD_VALUE;
1280 			if (user_memcpy(&greq, value, sizeof(group_req)) != B_OK)
1281 				return B_BAD_ADDRESS;
1282 
1283 			return ipv4_generic_delta_membership(protocol, option,
1284 				greq.gr_interface, &greq.gr_group, NULL);
1285 		}
1286 		if (option == MCAST_BLOCK_SOURCE
1287 			|| option == MCAST_UNBLOCK_SOURCE
1288 			|| option == MCAST_JOIN_SOURCE_GROUP
1289 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1290 			group_source_req greq;
1291 			if (length != sizeof(group_source_req))
1292 				return B_BAD_VALUE;
1293 			if (user_memcpy(&greq, value, sizeof(group_source_req)) != B_OK)
1294 				return B_BAD_ADDRESS;
1295 
1296 			return ipv4_generic_delta_membership(protocol, option,
1297 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1298 		}
1299 
1300 		dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1301 		return ENOPROTOOPT;
1302 	}
1303 
1304 	return sSocketModule->set_option(protocol->socket, level, option,
1305 		value, length);
1306 }
1307 
1308 
1309 status_t
1310 ipv4_bind(net_protocol* protocol, const struct sockaddr* address)
1311 {
1312 	if (address->sa_family != AF_INET)
1313 		return EAFNOSUPPORT;
1314 
1315 	// only INADDR_ANY and addresses of local interfaces are accepted:
1316 	if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY
1317 		|| IN_MULTICAST(ntohl(((sockaddr_in*)address)->sin_addr.s_addr))
1318 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1319 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1320 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1321 			// explicitly set length, as our callers can't be trusted to
1322 			// always provide the correct length!
1323 		return B_OK;
1324 	}
1325 
1326 	return B_ERROR;
1327 		// address is unknown on this host
1328 }
1329 
1330 
1331 status_t
1332 ipv4_unbind(net_protocol* protocol, struct sockaddr* address)
1333 {
1334 	// nothing to do here
1335 	return B_OK;
1336 }
1337 
1338 
1339 status_t
1340 ipv4_listen(net_protocol* protocol, int count)
1341 {
1342 	return B_NOT_SUPPORTED;
1343 }
1344 
1345 
1346 status_t
1347 ipv4_shutdown(net_protocol* protocol, int direction)
1348 {
1349 	return B_NOT_SUPPORTED;
1350 }
1351 
1352 
1353 status_t
1354 ipv4_send_routed_data(net_protocol* _protocol, struct net_route* route,
1355 	net_buffer* buffer)
1356 {
1357 	if (route == NULL)
1358 		return B_BAD_VALUE;
1359 
1360 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1361 	net_interface_address* interfaceAddress = route->interface_address;
1362 	net_interface* interface = interfaceAddress->interface;
1363 
1364 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1365 		buffer->size);
1366 
1367 	sockaddr_in& source = *(sockaddr_in*)buffer->source;
1368 	sockaddr_in& destination = *(sockaddr_in*)buffer->destination;
1369 	sockaddr_in* broadcastAddress = (sockaddr_in*)interfaceAddress->destination;
1370 
1371 	bool checksumNeeded = true;
1372 	bool headerIncluded = false;
1373 	if (protocol != NULL)
1374 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1375 
1376 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1377 
1378 	if (destination.sin_addr.s_addr == INADDR_ANY)
1379 		return EDESTADDRREQ;
1380 
1381 	if ((interface->device->flags & IFF_BROADCAST) != 0
1382 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1383 			|| (broadcastAddress != NULL && destination.sin_addr.s_addr
1384 					== broadcastAddress->sin_addr.s_addr))) {
1385 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1386 			return B_BAD_VALUE;
1387 		buffer->flags |= MSG_BCAST;
1388 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr)))
1389 		buffer->flags |= MSG_MCAST;
1390 
1391 	// Add IP header (if needed)
1392 
1393 	if (!headerIncluded) {
1394 		NetBufferPrepend<ipv4_header> header(buffer);
1395 		if (header.Status() != B_OK)
1396 			return header.Status();
1397 
1398 		header->version = IPV4_VERSION;
1399 		header->header_length = sizeof(ipv4_header) / 4;
1400 		header->service_type = protocol ? protocol->service_type : 0;
1401 		header->total_length = htons(buffer->size);
1402 		header->id = htons(atomic_add(&sPacketID, 1));
1403 		header->fragment_offset = 0;
1404 		if (protocol) {
1405 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1406 				? protocol->multicast_time_to_live : protocol->time_to_live;
1407 		} else {
1408 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1409 				? kDefaultMulticastTTL : kDefaultTTL;
1410 		}
1411 		header->protocol = protocol
1412 			? protocol->socket->protocol : buffer->protocol;
1413 		header->checksum = 0;
1414 
1415 		header->source = source.sin_addr.s_addr;
1416 		header->destination = destination.sin_addr.s_addr;
1417 
1418 		TRACE_ONLY(dump_ipv4_header(*header));
1419 	} else {
1420 		// if IP_HDRINCL, check if the source address is set
1421 		NetBufferHeaderReader<ipv4_header> header(buffer);
1422 		if (header.Status() != B_OK)
1423 			return header.Status();
1424 
1425 		if (header->source == 0) {
1426 			header->source = source.sin_addr.s_addr;
1427 			header->checksum = 0;
1428 			header.Sync();
1429 		} else
1430 			checksumNeeded = false;
1431 
1432 		TRACE("  Header was already supplied:");
1433 		TRACE_ONLY(dump_ipv4_header(*header));
1434 	}
1435 
1436 	if (buffer->size > 0xffff)
1437 		return EMSGSIZE;
1438 
1439 	if (checksumNeeded) {
1440 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1441 			sizeof(ipv4_header), true);
1442 	}
1443 
1444 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer "
1445 		"checksum: %ld",
1446 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1447 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1448 
1449 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08x",
1450 		ntohl(destination.sin_addr.s_addr));
1451 
1452 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1453 	if (buffer->size > mtu) {
1454 		// we need to fragment the packet
1455 		return send_fragments(protocol, route, buffer, mtu);
1456 	}
1457 
1458 	return sDatalinkModule->send_routed_data(route, buffer);
1459 }
1460 
1461 
1462 status_t
1463 ipv4_send_data(net_protocol* _protocol, net_buffer* buffer)
1464 {
1465 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1466 
1467 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1468 
1469 	if (protocol != NULL && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1470 		if (buffer->size < sizeof(ipv4_header))
1471 			return B_BAD_VALUE;
1472 
1473 		sockaddr_in* source = (sockaddr_in*)buffer->source;
1474 		sockaddr_in* destination = (sockaddr_in*)buffer->destination;
1475 
1476 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1477 			offsetof(ipv4_header, source)>(buffer));
1478 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1479 			offsetof(ipv4_header, destination)>(buffer));
1480 	}
1481 
1482 	// handle IP_MULTICAST_IF
1483 	if (IN_MULTICAST(ntohl(
1484 			((sockaddr_in*)buffer->destination)->sin_addr.s_addr))
1485 		&& protocol != NULL && protocol->multicast_address != NULL) {
1486 		net_interface_address* address = sDatalinkModule->get_interface_address(
1487 			protocol->multicast_address);
1488 		if (address == NULL || (address->interface->flags & IFF_UP) == 0) {
1489 			sDatalinkModule->put_interface_address(address);
1490 			return EADDRNOTAVAIL;
1491 		}
1492 
1493 		sDatalinkModule->put_interface_address(buffer->interface_address);
1494 		buffer->interface_address = address;
1495 			// the buffer takes over ownership of the address
1496 
1497 		net_route* route = sDatalinkModule->get_route(sDomain, address->local);
1498 		if (route == NULL)
1499 			return ENETUNREACH;
1500 
1501 		return sDatalinkModule->send_routed_data(route, buffer);
1502 	}
1503 
1504 	return sDatalinkModule->send_data(protocol, sDomain, buffer);
1505 }
1506 
1507 
1508 ssize_t
1509 ipv4_send_avail(net_protocol* protocol)
1510 {
1511 	return B_ERROR;
1512 }
1513 
1514 
1515 status_t
1516 ipv4_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1517 	net_buffer** _buffer)
1518 {
1519 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1520 	RawSocket* raw = protocol->raw;
1521 	if (raw == NULL)
1522 		return B_ERROR;
1523 
1524 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1525 
1526 	return raw->Dequeue(flags, _buffer);
1527 }
1528 
1529 
1530 ssize_t
1531 ipv4_read_avail(net_protocol* _protocol)
1532 {
1533 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1534 	RawSocket* raw = protocol->raw;
1535 	if (raw == NULL)
1536 		return B_ERROR;
1537 
1538 	return raw->AvailableData();
1539 }
1540 
1541 
1542 struct net_domain*
1543 ipv4_get_domain(net_protocol* protocol)
1544 {
1545 	return sDomain;
1546 }
1547 
1548 
1549 size_t
1550 ipv4_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1551 {
1552 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1553 	if (route == NULL)
1554 		return 0;
1555 
1556 	size_t mtu;
1557 	if (route->mtu != 0)
1558 		mtu = route->mtu;
1559 	else
1560 		mtu = route->interface_address->interface->mtu;
1561 
1562 	sDatalinkModule->put_route(sDomain, route);
1563 	return mtu - sizeof(ipv4_header);
1564 }
1565 
1566 
1567 status_t
1568 ipv4_receive_data(net_buffer* buffer)
1569 {
1570 	TRACE("ipv4_receive_data(%p [%ld bytes])", buffer, buffer->size);
1571 
1572 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1573 	if (bufferHeader.Status() != B_OK)
1574 		return bufferHeader.Status();
1575 
1576 	ipv4_header& header = bufferHeader.Data();
1577 	TRACE_ONLY(dump_ipv4_header(header));
1578 
1579 	if (header.version != IPV4_VERSION)
1580 		return B_BAD_TYPE;
1581 
1582 	uint16 packetLength = header.TotalLength();
1583 	uint16 headerLength = header.HeaderLength();
1584 	if (packetLength > buffer->size
1585 		|| headerLength < sizeof(ipv4_header))
1586 		return B_BAD_DATA;
1587 
1588 	// TODO: would be nice to have a direct checksum function somewhere
1589 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1590 		return B_BAD_DATA;
1591 
1592 	// lower layers notion of broadcast or multicast have no relevance to us
1593 	// other than deciding whether to send an ICMP error
1594 	bool wasMulticast = (buffer->flags & (MSG_BCAST | MSG_MCAST)) != 0;
1595 	bool notForUs = false;
1596 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1597 
1598 	sockaddr_in destination;
1599 	fill_sockaddr_in(&destination, header.destination);
1600 
1601 	if (header.destination == INADDR_BROADCAST) {
1602 		buffer->flags |= MSG_BCAST;
1603 
1604 		// Find first interface with a matching family
1605 		if (!sDatalinkModule->is_local_link_address(sDomain, true,
1606 				buffer->destination, &buffer->interface_address))
1607 			notForUs = !wasMulticast;
1608 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1609 		buffer->flags |= MSG_MCAST;
1610 	} else {
1611 		uint32 matchedAddressType = 0;
1612 
1613 		// test if the packet is really for us
1614 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1615 				&buffer->interface_address, &matchedAddressType)
1616 			&& !sDatalinkModule->is_local_link_address(sDomain, true,
1617 				buffer->destination, &buffer->interface_address)) {
1618 			// if the buffer was a link layer multicast, regard it as a
1619 			// broadcast, and let the upper levels decide what to do with it
1620 			if (wasMulticast)
1621 				buffer->flags |= MSG_BCAST;
1622 			else
1623 				notForUs = true;
1624 		} else {
1625 			// copy over special address types (MSG_BCAST or MSG_MCAST):
1626 			buffer->flags |= matchedAddressType;
1627 		}
1628 	}
1629 
1630 	// set net_buffer's source/destination address
1631 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1632 	memcpy(buffer->destination, &destination, sizeof(sockaddr_in));
1633 
1634 	buffer->protocol = header.protocol;
1635 
1636 	if (notForUs) {
1637 		TRACE("  ipv4_receive_data(): packet was not for us %x -> %x",
1638 			ntohl(header.source), ntohl(header.destination));
1639 
1640 		if (!wasMulticast) {
1641 			// Send ICMP error: Host unreachable
1642 			sDomain->module->error_reply(NULL, buffer, B_NET_ERROR_UNREACH_HOST,
1643 				NULL);
1644 		}
1645 
1646 		return B_ERROR;
1647 	}
1648 
1649 	// remove any trailing/padding data
1650 	status_t status = gBufferModule->trim(buffer, packetLength);
1651 	if (status != B_OK)
1652 		return status;
1653 
1654 	// check for fragmentation
1655 	uint16 fragmentOffset = header.FragmentOffset();
1656 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1657 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1658 		// this is a fragment
1659 		TRACE("  ipv4_receive_data(): Found a Fragment!");
1660 		status = reassemble_fragments(header, &buffer);
1661 		TRACE("  ipv4_receive_data():  -> %s", strerror(status));
1662 		if (status != B_OK)
1663 			return status;
1664 
1665 		if (buffer == NULL) {
1666 			// buffer was put into fragment packet
1667 			TRACE("  ipv4_receive_data(): Not yet assembled.");
1668 			return B_OK;
1669 		}
1670 	}
1671 
1672 	// Since the buffer might have been changed (reassembled fragment)
1673 	// we must no longer access bufferHeader or header anymore after
1674 	// this point
1675 
1676 	bool rawDelivered = raw_receive_data(buffer);
1677 
1678 	// Preserve the ipv4 header for ICMP processing
1679 	gBufferModule->store_header(buffer);
1680 
1681 	bufferHeader.Remove(headerLength);
1682 		// the header is of variable size and may include IP options
1683 		// (TODO: that we ignore for now)
1684 
1685 	net_protocol_module_info* module = receiving_protocol(buffer->protocol);
1686 	if (module == NULL) {
1687 		// no handler for this packet
1688 		if (!rawDelivered) {
1689 			sDomain->module->error_reply(NULL, buffer,
1690 				B_NET_ERROR_UNREACH_PROTOCOL, NULL);
1691 		}
1692 		return EAFNOSUPPORT;
1693 	}
1694 
1695 	if ((buffer->flags & MSG_MCAST) != 0) {
1696 		// Unfortunately historical reasons dictate that the IP multicast
1697 		// model be a little different from the unicast one. We deliver
1698 		// this frame directly to all sockets registered with interest
1699 		// for this multicast group.
1700 		deliver_multicast(module, buffer, false);
1701 		gBufferModule->free(buffer);
1702 		return B_OK;
1703 	}
1704 
1705 	return module->receive_data(buffer);
1706 }
1707 
1708 
1709 status_t
1710 ipv4_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1711 {
1712 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1713 
1714 	if (protocol->raw == NULL)
1715 		return B_ERROR;
1716 
1717 	return protocol->raw->EnqueueClone(buffer);
1718 }
1719 
1720 
1721 status_t
1722 ipv4_error_received(net_error error, net_buffer* buffer)
1723 {
1724 	TRACE("  ipv4_error_received(error %d, buffer %p [%zu bytes])", (int)error,
1725 		buffer, buffer->size);
1726 
1727 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1728 	if (bufferHeader.Status() != B_OK)
1729 		return bufferHeader.Status();
1730 
1731 	ipv4_header& header = bufferHeader.Data();
1732 	TRACE_ONLY(dump_ipv4_header(header));
1733 
1734 	// We do not check the packet length, as we usually only get a part of it
1735 	uint16 headerLength = header.HeaderLength();
1736 	if (header.version != IPV4_VERSION
1737 		|| headerLength < sizeof(ipv4_header)
1738 		|| gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1739 		return B_BAD_DATA;
1740 
1741 	// Restore addresses of the original buffer
1742 
1743 	// lower layers notion of broadcast or multicast have no relevance to us
1744 	// TODO: they actually have when deciding whether to send an ICMP error
1745 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1746 
1747 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1748 	fill_sockaddr_in((struct sockaddr_in*)buffer->destination,
1749 		header.destination);
1750 
1751 	if (header.destination == INADDR_BROADCAST)
1752 		buffer->flags |= MSG_BCAST;
1753 	else if (IN_MULTICAST(ntohl(header.destination)))
1754 		buffer->flags |= MSG_MCAST;
1755 
1756 	// test if the packet is really from us
1757 	if (!sDatalinkModule->is_local_address(sDomain, buffer->source, NULL,
1758 			NULL)) {
1759 		TRACE("  ipv4_error_received(): packet was not for us %x -> %x",
1760 			ntohl(header.source), ntohl(header.destination));
1761 		return B_ERROR;
1762 	}
1763 
1764 	buffer->protocol = header.protocol;
1765 
1766 	bufferHeader.Remove(headerLength);
1767 
1768 	net_protocol_module_info* protocol = receiving_protocol(buffer->protocol);
1769 	if (protocol == NULL)
1770 		return B_ERROR;
1771 
1772 	// propagate error
1773 	return protocol->error_received(error, buffer);
1774 }
1775 
1776 
1777 status_t
1778 ipv4_error_reply(net_protocol* protocol, net_buffer* cause, net_error error,
1779 	net_error_data* errorData)
1780 {
1781 	// Directly obtain the ICMP protocol module
1782 	net_protocol_module_info* icmp = receiving_protocol(IPPROTO_ICMP);
1783 	if (icmp == NULL)
1784 		return B_ERROR;
1785 
1786 	return icmp->error_reply(protocol, cause, error, errorData);
1787 }
1788 
1789 
1790 ssize_t
1791 ipv4_process_ancillary_data_no_container(net_protocol* protocol,
1792 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1793 {
1794 	ssize_t bytesWritten = 0;
1795 
1796 	if ((((ipv4_protocol*)protocol)->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0) {
1797 		if (msgControlLen < CMSG_SPACE(sizeof(struct in_addr)))
1798 			return B_NO_MEMORY;
1799 
1800 		cmsghdr* messageHeader = (cmsghdr*)msgControl;
1801 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1802 		messageHeader->cmsg_level = IPPROTO_IP;
1803 		messageHeader->cmsg_type = IP_RECVDSTADDR;
1804 
1805 		memcpy(CMSG_DATA(messageHeader),
1806 		 	&((struct sockaddr_in*)buffer->destination)->sin_addr,
1807 		 	sizeof(struct in_addr));
1808 
1809 		bytesWritten += CMSG_SPACE(sizeof(struct in_addr));
1810 	}
1811 
1812 	return bytesWritten;
1813 }
1814 
1815 
1816 //	#pragma mark -
1817 
1818 
1819 status_t
1820 init_ipv4()
1821 {
1822 	sPacketID = (int32)system_time();
1823 
1824 	mutex_init(&sRawSocketsLock, "raw sockets");
1825 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1826 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1827 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1828 
1829 	status_t status;
1830 
1831 	sMulticastState = new MulticastState();
1832 	if (sMulticastState == NULL) {
1833 		status = B_NO_MEMORY;
1834 		goto err4;
1835 	}
1836 
1837 	status = sMulticastState->Init();
1838 	if (status != B_OK)
1839 		goto err5;
1840 
1841 	new (&sFragmentHash) FragmentTable();
1842 	status = sFragmentHash.Init(256);
1843 	if (status != B_OK)
1844 		goto err5;
1845 
1846 	new (&sRawSockets) RawSocketList;
1847 		// static initializers do not work in the kernel,
1848 		// so we have to do it here, manually
1849 		// TODO: for modules, this shouldn't be required
1850 
1851 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1852 		"network/protocols/ipv4/v1", NULL);
1853 	if (status != B_OK)
1854 		goto err6;
1855 
1856 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1857 		&gIPv4AddressModule, &sDomain);
1858 	if (status != B_OK)
1859 		goto err6;
1860 
1861 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1862 		"list all current IPv4 multicast states");
1863 
1864 	return B_OK;
1865 
1866 err6:
1867 	sFragmentHash.~FragmentTable();
1868 err5:
1869 	delete sMulticastState;
1870 err4:
1871 	mutex_destroy(&sReceivingProtocolLock);
1872 	mutex_destroy(&sMulticastGroupsLock);
1873 	mutex_destroy(&sFragmentLock);
1874 	mutex_destroy(&sRawSocketsLock);
1875 	return status;
1876 }
1877 
1878 
1879 status_t
1880 uninit_ipv4()
1881 {
1882 	mutex_lock(&sReceivingProtocolLock);
1883 
1884 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1885 
1886 	// put all the domain receiving protocols we gathered so far
1887 	for (uint32 i = 0; i < 256; i++) {
1888 		if (sReceivingProtocol[i] != NULL)
1889 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1890 	}
1891 
1892 	gStackModule->unregister_domain(sDomain);
1893 	mutex_unlock(&sReceivingProtocolLock);
1894 
1895 	delete sMulticastState;
1896 	sFragmentHash.~FragmentTable();
1897 
1898 	mutex_destroy(&sMulticastGroupsLock);
1899 	mutex_destroy(&sFragmentLock);
1900 	mutex_destroy(&sRawSocketsLock);
1901 	mutex_destroy(&sReceivingProtocolLock);
1902 
1903 	return B_OK;
1904 }
1905 
1906 
1907 static status_t
1908 ipv4_std_ops(int32 op, ...)
1909 {
1910 	switch (op) {
1911 		case B_MODULE_INIT:
1912 			return init_ipv4();
1913 		case B_MODULE_UNINIT:
1914 			return uninit_ipv4();
1915 
1916 		default:
1917 			return B_ERROR;
1918 	}
1919 }
1920 
1921 
1922 net_protocol_module_info gIPv4Module = {
1923 	{
1924 		"network/protocols/ipv4/v1",
1925 		0,
1926 		ipv4_std_ops
1927 	},
1928 	NET_PROTOCOL_ATOMIC_MESSAGES,
1929 
1930 	ipv4_init_protocol,
1931 	ipv4_uninit_protocol,
1932 	ipv4_open,
1933 	ipv4_close,
1934 	ipv4_free,
1935 	ipv4_connect,
1936 	ipv4_accept,
1937 	ipv4_control,
1938 	ipv4_getsockopt,
1939 	ipv4_setsockopt,
1940 	ipv4_bind,
1941 	ipv4_unbind,
1942 	ipv4_listen,
1943 	ipv4_shutdown,
1944 	ipv4_send_data,
1945 	ipv4_send_routed_data,
1946 	ipv4_send_avail,
1947 	ipv4_read_data,
1948 	ipv4_read_avail,
1949 	ipv4_get_domain,
1950 	ipv4_get_mtu,
1951 	ipv4_receive_data,
1952 	ipv4_deliver_data,
1953 	ipv4_error_received,
1954 	ipv4_error_reply,
1955 	NULL,		// add_ancillary_data()
1956 	NULL,		// process_ancillary_data()
1957 	ipv4_process_ancillary_data_no_container,
1958 	NULL,		// send_data_no_buffer()
1959 	NULL		// read_data_no_buffer()
1960 };
1961 
1962 module_dependency module_dependencies[] = {
1963 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1964 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1965 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1966 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1967 	{}
1968 };
1969 
1970 module_info* modules[] = {
1971 	(module_info*)&gIPv4Module,
1972 	NULL
1973 };
1974