xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision 0041b382147b19ee82ba05e36f007951892c2223)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4.h"
11 #include "ipv4_address.h"
12 #include "multicast.h"
13 
14 #include <icmp.h>
15 #include <net_datalink.h>
16 #include <net_datalink_protocol.h>
17 #include <net_device.h>
18 #include <net_protocol.h>
19 #include <net_stack.h>
20 #include <NetBufferUtilities.h>
21 #include <ProtocolUtilities.h>
22 
23 #include <KernelExport.h>
24 #include <util/AutoLock.h>
25 #include <util/list.h>
26 #include <util/khash.h>
27 #include <util/DoublyLinkedList.h>
28 #include <util/MultiHashTable.h>
29 
30 #include <netinet/in.h>
31 #include <netinet/ip.h>
32 #include <new>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <utility>
37 
38 
39 //#define TRACE_IPV4
40 #ifdef TRACE_IPV4
41 #	define TRACE(format, args...) \
42 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
43 #	define TRACE_SK(protocol, format, args...) \
44 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
45 			protocol , ##args)
46 #	define TRACE_ONLY(x) x
47 #else
48 #	define TRACE(args...) ;
49 #	define TRACE_SK(args...) ;
50 #	define TRACE_ONLY(x)
51 #endif
52 
53 
54 #define MAX_HASH_FRAGMENTS 		64
55 	// slots in the fragment packet's hash
56 #define FRAGMENT_TIMEOUT		60000000LL
57 	// discard fragment after 60 seconds
58 
59 
60 typedef DoublyLinkedList<struct net_buffer,
61 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
62 
63 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
64 
65 struct ipv4_packet_key {
66 	in_addr_t	source;
67 	in_addr_t	destination;
68 	uint16		id;
69 	uint8		protocol;
70 };
71 
72 class FragmentPacket {
73 public:
74 								FragmentPacket(const ipv4_packet_key& key);
75 								~FragmentPacket();
76 
77 			status_t			AddFragment(uint16 start, uint16 end,
78 									net_buffer* buffer, bool lastFragment);
79 			status_t			Reassemble(net_buffer* to);
80 
81 			bool				IsComplete() const
82 									{ return fReceivedLastFragment
83 										&& fBytesLeft == 0; }
84 
85 	static	uint32				Hash(void* _packet, const void* _key,
86 									uint32 range);
87 	static	int					Compare(void* _packet, const void* _key);
88 	static	int32				NextOffset()
89 									{ return offsetof(FragmentPacket, fNext); }
90 	static	void				StaleTimer(struct net_timer* timer, void* data);
91 
92 private:
93 			FragmentPacket*		fNext;
94 			struct ipv4_packet_key fKey;
95 			bool				fReceivedLastFragment;
96 			int32				fBytesLeft;
97 			FragmentList		fFragments;
98 			net_timer			fTimer;
99 };
100 
101 
102 class RawSocket
103 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
104 public:
105 								RawSocket(net_socket* socket);
106 };
107 
108 typedef DoublyLinkedList<RawSocket> RawSocketList;
109 
110 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
111 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
112 
113 struct MulticastStateHash {
114 	typedef std::pair<const in_addr* , uint32> KeyType;
115 	typedef IPv4GroupInterface ValueType;
116 
117 	size_t HashKey(const KeyType &key) const
118 		{ return key.first->s_addr ^ key.second; }
119 	size_t Hash(ValueType* value) const
120 		{ return HashKey(std::make_pair(&value->Address(),
121 			value->Interface()->index)); }
122 	bool Compare(const KeyType &key, ValueType* value) const
123 		{ return value->Interface()->index == key.second
124 			&& value->Address().s_addr == key.first->s_addr; }
125 	bool CompareValues(ValueType* value1, ValueType* value2) const
126 		{ return value1->Interface()->index == value2->Interface()->index
127 			&& value1->Address().s_addr == value2->Address().s_addr; }
128 	ValueType*& GetLink(ValueType* value) const { return value->HashLink(); }
129 };
130 
131 
132 struct ipv4_protocol : net_protocol {
133 	ipv4_protocol()
134 		:
135 		multicast_filter(this)
136 	{
137 	}
138 
139 	RawSocket*			raw;
140 	uint8				service_type;
141 	uint8				time_to_live;
142 	uint8				multicast_time_to_live;
143 	uint32				flags;
144 	struct sockaddr*	interface_address; // for IP_MULTICAST_IF
145 
146 	IPv4MulticastFilter	multicast_filter;
147 };
148 
149 // protocol flags
150 #define IP_FLAG_HEADER_INCLUDED		0x01
151 #define IP_FLAG_RECEIVE_DEST_ADDR	0x02
152 
153 
154 static const int kDefaultTTL = 254;
155 static const int kDefaultMulticastTTL = 1;
156 
157 
158 extern net_protocol_module_info gIPv4Module;
159 	// we need this in ipv4_std_ops() for registering the AF_INET domain
160 
161 net_stack_module_info* gStackModule;
162 net_buffer_module_info* gBufferModule;
163 
164 static struct net_domain* sDomain;
165 static net_datalink_module_info* sDatalinkModule;
166 static net_socket_module_info* sSocketModule;
167 static int32 sPacketID;
168 static RawSocketList sRawSockets;
169 static mutex sRawSocketsLock;
170 static mutex sFragmentLock;
171 static hash_table* sFragmentHash;
172 static mutex sMulticastGroupsLock;
173 
174 typedef MultiHashTable<MulticastStateHash> MulticastState;
175 static MulticastState* sMulticastState;
176 
177 static net_protocol_module_info* sReceivingProtocol[256];
178 static mutex sReceivingProtocolLock;
179 
180 
181 static const char*
182 print_address(const in_addr* address, char* buf, size_t bufLen)
183 {
184 	unsigned int addr = ntohl(address->s_addr);
185 
186 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
187 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
188 
189 	return buf;
190 }
191 
192 
193 RawSocket::RawSocket(net_socket* socket)
194 	:
195 	DatagramSocket<>("ipv4 raw socket", socket)
196 {
197 }
198 
199 
200 //	#pragma mark -
201 
202 
203 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
204 	:
205 	fKey(key),
206 	fReceivedLastFragment(false),
207 	fBytesLeft(IP_MAXPACKET)
208 {
209 	gStackModule->init_timer(&fTimer, FragmentPacket::StaleTimer, this);
210 }
211 
212 
213 FragmentPacket::~FragmentPacket()
214 {
215 	// cancel the kill timer
216 	gStackModule->set_timer(&fTimer, -1);
217 
218 	// delete all fragments
219 	net_buffer* buffer;
220 	while ((buffer = fFragments.RemoveHead()) != NULL) {
221 		gBufferModule->free(buffer);
222 	}
223 }
224 
225 
226 status_t
227 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
228 	bool lastFragment)
229 {
230 	// restart the timer
231 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
232 
233 	if (start >= end) {
234 		// invalid fragment
235 		return B_BAD_DATA;
236 	}
237 
238 	// Search for a position in the list to insert the fragment
239 
240 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
241 	net_buffer* previous = NULL;
242 	net_buffer* next = NULL;
243 	while ((previous = iterator.Next()) != NULL) {
244 		if (previous->fragment.start <= start) {
245 			// The new fragment can be inserted after this one
246 			break;
247 		}
248 
249 		next = previous;
250 	}
251 
252 	// See if we already have the fragment's data
253 
254 	if (previous != NULL && previous->fragment.start <= start
255 		&& previous->fragment.end >= end) {
256 		// we do, so we can just drop this fragment
257 		gBufferModule->free(buffer);
258 		return B_OK;
259 	}
260 
261 	TRACE("    previous: %p, next: %p", previous, next);
262 
263 	// If we have parts of the data already, truncate as needed
264 
265 	if (previous != NULL && previous->fragment.end > start) {
266 		TRACE("    remove header %d bytes", previous->fragment.end - start);
267 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
268 		start = previous->fragment.end;
269 	}
270 	if (next != NULL && next->fragment.start < end) {
271 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
272 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
273 		end = next->fragment.start;
274 	}
275 
276 	// Now try if we can already merge the fragments together
277 
278 	// We will always keep the last buffer received, so that we can still
279 	// report an error (in which case we're not responsible for freeing it)
280 
281 	if (previous != NULL && previous->fragment.end == start) {
282 		fFragments.Remove(previous);
283 
284 		buffer->fragment.start = previous->fragment.start;
285 		buffer->fragment.end = end;
286 
287 		status_t status = gBufferModule->merge(buffer, previous, false);
288 		TRACE("    merge previous: %s", strerror(status));
289 		if (status != B_OK) {
290 			fFragments.Insert(next, previous);
291 			return status;
292 		}
293 
294 		fFragments.Insert(next, buffer);
295 
296 		// cut down existing hole
297 		fBytesLeft -= end - start;
298 
299 		if (lastFragment && !fReceivedLastFragment) {
300 			fReceivedLastFragment = true;
301 			fBytesLeft -= IP_MAXPACKET - end;
302 		}
303 
304 		TRACE("    hole length: %d", (int)fBytesLeft);
305 
306 		return B_OK;
307 	} else if (next != NULL && next->fragment.start == end) {
308 		net_buffer* afterNext = (net_buffer*)next->link.next;
309 		fFragments.Remove(next);
310 
311 		buffer->fragment.start = start;
312 		buffer->fragment.end = next->fragment.end;
313 
314 		status_t status = gBufferModule->merge(buffer, next, true);
315 		TRACE("    merge next: %s", strerror(status));
316 		if (status != B_OK) {
317 			// Insert "next" at its previous position
318 			fFragments.Insert(afterNext, next);
319 			return status;
320 		}
321 
322 		fFragments.Insert(afterNext, buffer);
323 
324 		// cut down existing hole
325 		fBytesLeft -= end - start;
326 
327 		if (lastFragment && !fReceivedLastFragment) {
328 			fReceivedLastFragment = true;
329 			fBytesLeft -= IP_MAXPACKET - end;
330 		}
331 
332 		TRACE("    hole length: %d", (int)fBytesLeft);
333 
334 		return B_OK;
335 	}
336 
337 	// We couldn't merge the fragments, so we need to add it as is
338 
339 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
340 
341 	buffer->fragment.start = start;
342 	buffer->fragment.end = end;
343 	fFragments.Insert(next, buffer);
344 
345 	// update length of the hole, if any
346 	fBytesLeft -= end - start;
347 
348 	if (lastFragment && !fReceivedLastFragment) {
349 		fReceivedLastFragment = true;
350 		fBytesLeft -= IP_MAXPACKET - end;
351 	}
352 
353 	TRACE("    hole length: %d", (int)fBytesLeft);
354 
355 	return B_OK;
356 }
357 
358 
359 /*!	Reassembles the fragments to the specified buffer \a to.
360 	This buffer must have been added via AddFragment() before.
361 */
362 status_t
363 FragmentPacket::Reassemble(net_buffer* to)
364 {
365 	if (!IsComplete())
366 		return B_ERROR;
367 
368 	net_buffer* buffer = NULL;
369 
370 	net_buffer* fragment;
371 	while ((fragment = fFragments.RemoveHead()) != NULL) {
372 		if (buffer != NULL) {
373 			status_t status;
374 			if (to == fragment) {
375 				status = gBufferModule->merge(fragment, buffer, false);
376 				buffer = fragment;
377 			} else
378 				status = gBufferModule->merge(buffer, fragment, true);
379 			if (status != B_OK)
380 				return status;
381 		} else
382 			buffer = fragment;
383 	}
384 
385 	if (buffer != to)
386 		panic("ipv4 packet reassembly did not work correctly.");
387 
388 	return B_OK;
389 }
390 
391 
392 int
393 FragmentPacket::Compare(void* _packet, const void* _key)
394 {
395 	const ipv4_packet_key* key = (ipv4_packet_key*)_key;
396 	ipv4_packet_key* packetKey = &((FragmentPacket*)_packet)->fKey;
397 
398 	if (packetKey->id == key->id
399 		&& packetKey->source == key->source
400 		&& packetKey->destination == key->destination
401 		&& packetKey->protocol == key->protocol)
402 		return 0;
403 
404 	return 1;
405 }
406 
407 
408 uint32
409 FragmentPacket::Hash(void* _packet, const void* _key, uint32 range)
410 {
411 	const struct ipv4_packet_key* key = (struct ipv4_packet_key*)_key;
412 	FragmentPacket* packet = (FragmentPacket*)_packet;
413 	if (packet != NULL)
414 		key = &packet->fKey;
415 
416 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
417 }
418 
419 
420 /*static*/ void
421 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
422 {
423 	FragmentPacket* packet = (FragmentPacket*)data;
424 	TRACE("Assembling FragmentPacket %p timed out!", packet);
425 
426 	MutexLocker locker(&sFragmentLock);
427 	hash_remove(sFragmentHash, packet);
428 	locker.Unlock();
429 
430 	if (!packet->fFragments.IsEmpty()) {
431 		// Send error: fragment reassembly time exceeded
432 		sDomain->module->error_reply(NULL, packet->fFragments.First(),
433 			icmp_encode(ICMP_TYPE_TIME_EXCEEDED, ICMP_CODE_TIMEEX_FRAG), NULL);
434 	}
435 
436 	delete packet;
437 }
438 
439 
440 //	#pragma mark -
441 
442 
443 #ifdef TRACE_IPV4
444 static void
445 dump_ipv4_header(ipv4_header &header)
446 {
447 	struct pretty_ipv4 {
448 	#if B_HOST_IS_LENDIAN == 1
449 		uint8 a;
450 		uint8 b;
451 		uint8 c;
452 		uint8 d;
453 	#else
454 		uint8 d;
455 		uint8 c;
456 		uint8 b;
457 		uint8 a;
458 	#endif
459 	};
460 	struct pretty_ipv4* src = (struct pretty_ipv4*)&header.source;
461 	struct pretty_ipv4* dst = (struct pretty_ipv4*)&header.destination;
462 	dprintf("  version: %d\n", header.version);
463 	dprintf("  header_length: 4 * %d\n", header.header_length);
464 	dprintf("  service_type: %d\n", header.service_type);
465 	dprintf("  total_length: %d\n", header.TotalLength());
466 	dprintf("  id: %d\n", ntohs(header.id));
467 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
468 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
469 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
470 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
471 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
472 	dprintf("  time_to_live: %d\n", header.time_to_live);
473 	dprintf("  protocol: %d\n", header.protocol);
474 	dprintf("  checksum: %d\n", ntohs(header.checksum));
475 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
476 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
477 }
478 #endif	// TRACE_IPV4
479 
480 
481 static int
482 dump_ipv4_multicast(int argc, char** argv)
483 {
484 	MulticastState::Iterator it = sMulticastState->GetIterator();
485 
486 	while (it.HasNext()) {
487 		IPv4GroupInterface* state = it.Next();
488 
489 		char addressBuffer[64];
490 
491 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
492 			print_address(&state->Address(), addressBuffer,
493 			sizeof(addressBuffer)),
494 			state->Mode() == IPv4GroupInterface::kExclude
495 				? "Exclude" : "Include");
496 
497 		int count = 0;
498 		IPv4GroupInterface::AddressSet::Iterator it
499 			= state->Sources().GetIterator();
500 		while (it.HasNext()) {
501 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
502 				addressBuffer, sizeof(addressBuffer)));
503 			count++;
504 		}
505 
506 		kprintf("}> sock %p\n", state->Parent()->Socket());
507 	}
508 
509 	return 0;
510 }
511 
512 
513 /*!	Attempts to re-assemble fragmented packets.
514 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
515 		will point to its buffer, otherwise, it will be \c NULL.
516 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
517 */
518 static status_t
519 reassemble_fragments(const ipv4_header &header, net_buffer** _buffer)
520 {
521 	net_buffer* buffer = *_buffer;
522 	status_t status;
523 
524 	struct ipv4_packet_key key;
525 	key.source = (in_addr_t)header.source;
526 	key.destination = (in_addr_t)header.destination;
527 	key.id = header.id;
528 	key.protocol = header.protocol;
529 
530 	// TODO: Make locking finer grained.
531 	MutexLocker locker(&sFragmentLock);
532 
533 	FragmentPacket* packet = (FragmentPacket*)hash_lookup(sFragmentHash, &key);
534 	if (packet == NULL) {
535 		// New fragment packet
536 		packet = new (std::nothrow) FragmentPacket(key);
537 		if (packet == NULL)
538 			return B_NO_MEMORY;
539 
540 		// add packet to hash
541 		status = hash_insert(sFragmentHash, packet);
542 		if (status != B_OK) {
543 			delete packet;
544 			return status;
545 		}
546 	}
547 
548 	uint16 fragmentOffset = header.FragmentOffset();
549 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
550 	uint16 end = start + header.TotalLength() - header.HeaderLength();
551 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
552 
553 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
554 		lastFragment ? "last ": "", end - start, start);
555 
556 	// Remove header unless this is the first fragment
557 	if (start != 0)
558 		gBufferModule->remove_header(buffer, header.HeaderLength());
559 
560 	status = packet->AddFragment(start, end, buffer, lastFragment);
561 	if (status != B_OK)
562 		return status;
563 
564 	if (packet->IsComplete()) {
565 		hash_remove(sFragmentHash, packet);
566 			// no matter if reassembling succeeds, we won't need this packet
567 			// anymore
568 
569 		status = packet->Reassemble(buffer);
570 		delete packet;
571 
572 		// _buffer does not change
573 		return status;
574 	}
575 
576 	// This indicates that the packet is not yet complete
577 	*_buffer = NULL;
578 	return B_OK;
579 }
580 
581 
582 /*!	Fragments the incoming buffer and send all fragments via the specified
583 	\a route.
584 */
585 static status_t
586 send_fragments(ipv4_protocol* protocol, struct net_route* route,
587 	net_buffer* buffer, uint32 mtu)
588 {
589 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
590 
591 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
592 	if (originalHeader.Status() != B_OK)
593 		return originalHeader.Status();
594 
595 	uint16 headerLength = originalHeader->HeaderLength();
596 	uint32 bytesLeft = buffer->size - headerLength;
597 	uint32 fragmentOffset = 0;
598 	status_t status = B_OK;
599 
600 	net_buffer* headerBuffer = gBufferModule->split(buffer, headerLength);
601 	if (headerBuffer == NULL)
602 		return B_NO_MEMORY;
603 
604 	// TODO: we need to make sure ipv4_header is contiguous or
605 	// use another construct.
606 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
607 	ipv4_header* header = &bufferHeader.Data();
608 
609 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
610 	// this way)
611 	mtu -= headerLength;
612 	mtu &= ~7;
613 	TRACE("  adjusted MTU to %ld, bytesLeft %ld", mtu, bytesLeft);
614 
615 	while (bytesLeft > 0) {
616 		uint32 fragmentLength = min_c(bytesLeft, mtu);
617 		bytesLeft -= fragmentLength;
618 		bool lastFragment = bytesLeft == 0;
619 
620 		header->total_length = htons(fragmentLength + headerLength);
621 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
622 			| (fragmentOffset >> 3));
623 		header->checksum = 0;
624 		header->checksum = gStackModule->checksum((uint8*)header,
625 			headerLength);
626 			// TODO: compute the checksum only for those parts that changed?
627 
628 		TRACE("  send fragment of %ld bytes (%ld bytes left)", fragmentLength,
629 			bytesLeft);
630 
631 		net_buffer* fragmentBuffer;
632 		if (!lastFragment) {
633 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
634 			fragmentOffset += fragmentLength;
635 		} else
636 			fragmentBuffer = buffer;
637 
638 		if (fragmentBuffer == NULL) {
639 			status = B_NO_MEMORY;
640 			break;
641 		}
642 
643 		// copy header to fragment
644 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
645 
646 		// send fragment
647 		if (status == B_OK)
648 			status = sDatalinkModule->send_data(route, fragmentBuffer);
649 
650 		if (lastFragment) {
651 			// we don't own the last buffer, so we don't have to free it
652 			break;
653 		}
654 
655 		if (status != B_OK) {
656 			gBufferModule->free(fragmentBuffer);
657 			break;
658 		}
659 	}
660 
661 	gBufferModule->free(headerBuffer);
662 	return status;
663 }
664 
665 
666 /*!	Delivers the provided \a buffer to all listeners of this multicast group.
667 	Does not take over ownership of the buffer.
668 */
669 static bool
670 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
671 	bool deliverToRaw)
672 {
673 	if (module->deliver_data == NULL)
674 		return false;
675 
676 	MutexLocker _(sMulticastGroupsLock);
677 
678 	sockaddr_in* multicastAddr = (sockaddr_in*)buffer->destination;
679 
680 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
681 		&multicastAddr->sin_addr, buffer->interface->index));
682 
683 	size_t count = 0;
684 
685 	while (it.HasNext()) {
686 		IPv4GroupInterface* state = it.Next();
687 
688 		ipv4_protocol* ipProtocol = state->Parent()->Socket();
689 		if (deliverToRaw && (ipProtocol->raw == NULL
690 			|| ipProtocol->socket->protocol != buffer->protocol))
691 			continue;
692 
693 		if (state->FilterAccepts(buffer)) {
694 			net_protocol* protocol = ipProtocol;
695 			if (protocol->module != module) {
696 				// as multicast filters are installed with an IPv4 protocol
697 				// reference, we need to go and find the appropriate instance
698 				// related to the 'receiving protocol' with module 'module'.
699 				net_protocol* protocol = ipProtocol->socket->first_protocol;
700 
701 				while (protocol != NULL && protocol->module != module)
702 					protocol = protocol->next;
703 			}
704 
705 			if (protocol != NULL) {
706 				module->deliver_data(protocol, buffer);
707 				count++;
708 			}
709 		}
710 	}
711 
712 	return count > 0;
713 }
714 
715 
716 /*!	Delivers the buffer to all listening raw sockets without taking ownership of
717 	the provided \a buffer.
718 	Returns \c true if there was any receiver, \c false if not.
719 */
720 static bool
721 raw_receive_data(net_buffer* buffer)
722 {
723 	MutexLocker locker(sRawSocketsLock);
724 
725 	if (sRawSockets.IsEmpty())
726 		return false;
727 
728 	TRACE("RawReceiveData(%i)", buffer->protocol);
729 
730 	if ((buffer->flags & MSG_MCAST) != 0) {
731 		// we need to call deliver_multicast here separately as
732 		// buffer still has the IP header, and it won't in the
733 		// next call. This isn't very optimized but works for now.
734 		// A better solution would be to hold separate hash tables
735 		// and lists for RAW and non-RAW sockets.
736 		return deliver_multicast(&gIPv4Module, buffer, true);
737 	}
738 
739 	RawSocketList::Iterator iterator = sRawSockets.GetIterator();
740 	size_t count = 0;
741 
742 	while (iterator.HasNext()) {
743 		RawSocket* raw = iterator.Next();
744 
745 		if (raw->Socket()->protocol == buffer->protocol) {
746 			raw->SocketEnqueue(buffer);
747 			count++;
748 		}
749 	}
750 
751 	return count > 0;
752 }
753 
754 
755 static inline sockaddr*
756 fill_sockaddr_in(sockaddr_in* target, in_addr_t address)
757 {
758 	target->sin_family = AF_INET;
759 	target->sin_len = sizeof(sockaddr_in);
760 	target->sin_port = 0;
761 	target->sin_addr.s_addr = address;
762 	return (sockaddr*)target;
763 }
764 
765 
766 status_t
767 IPv4Multicast::JoinGroup(IPv4GroupInterface* state)
768 {
769 	MutexLocker _(sMulticastGroupsLock);
770 
771 	sockaddr_in groupAddr;
772 	net_interface* interface = state->Interface();
773 
774 	status_t status = interface->first_info->join_multicast(
775 		interface->first_protocol,
776 		fill_sockaddr_in(&groupAddr, state->Address().s_addr));
777 	if (status != B_OK)
778 		return status;
779 
780 	sMulticastState->Insert(state);
781 	return B_OK;
782 }
783 
784 
785 status_t
786 IPv4Multicast::LeaveGroup(IPv4GroupInterface* state)
787 {
788 	MutexLocker _(sMulticastGroupsLock);
789 
790 	sMulticastState->Remove(state);
791 
792 	sockaddr_in groupAddr;
793 	net_interface* interface = state->Interface();
794 
795 	return interface->first_protocol->module->join_multicast(
796 		interface->first_protocol,
797 		fill_sockaddr_in(&groupAddr, state->Address().s_addr));
798 }
799 
800 
801 static net_protocol_module_info*
802 receiving_protocol(uint8 protocol)
803 {
804 	net_protocol_module_info* module = sReceivingProtocol[protocol];
805 	if (module != NULL)
806 		return module;
807 
808 	MutexLocker locker(sReceivingProtocolLock);
809 
810 	module = sReceivingProtocol[protocol];
811 	if (module != NULL)
812 		return module;
813 
814 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
815 			&module) == B_OK)
816 		sReceivingProtocol[protocol] = module;
817 
818 	return module;
819 }
820 
821 
822 static status_t
823 ipv4_delta_group(IPv4GroupInterface* group, int option,
824 	net_interface* interface, const in_addr* sourceAddr)
825 {
826 	switch (option) {
827 		case IP_ADD_MEMBERSHIP:
828 			return group->Add();
829 		case IP_DROP_MEMBERSHIP:
830 			return group->Drop();
831 		case IP_BLOCK_SOURCE:
832 			return group->BlockSource(*sourceAddr);
833 		case IP_UNBLOCK_SOURCE:
834 			return group->UnblockSource(*sourceAddr);
835 		case IP_ADD_SOURCE_MEMBERSHIP:
836 			return group->AddSSM(*sourceAddr);
837 		case IP_DROP_SOURCE_MEMBERSHIP:
838 			return group->DropSSM(*sourceAddr);
839 	}
840 
841 	return B_ERROR;
842 }
843 
844 
845 static status_t
846 ipv4_delta_membership(ipv4_protocol* protocol, int option,
847 	net_interface* interface, const in_addr* groupAddr,
848 	const in_addr* sourceAddr)
849 {
850 	IPv4MulticastFilter &filter = protocol->multicast_filter;
851 	IPv4GroupInterface* state = NULL;
852 	status_t status = B_OK;
853 
854 	switch (option) {
855 		case IP_ADD_MEMBERSHIP:
856 		case IP_ADD_SOURCE_MEMBERSHIP:
857 			status = filter.GetState(*groupAddr, interface, state, true);
858 			break;
859 
860 		case IP_DROP_MEMBERSHIP:
861 		case IP_BLOCK_SOURCE:
862 		case IP_UNBLOCK_SOURCE:
863 		case IP_DROP_SOURCE_MEMBERSHIP:
864 			filter.GetState(*groupAddr, interface, state, false);
865 			if (state == NULL) {
866 				if (option == IP_DROP_MEMBERSHIP
867 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
868 					return EADDRNOTAVAIL;
869 
870 				return B_BAD_VALUE;
871 			}
872 			break;
873 	}
874 
875 	if (status != B_OK)
876 		return status;
877 
878 	status = ipv4_delta_group(state, option, interface, sourceAddr);
879 	filter.ReturnState(state);
880 	return status;
881 }
882 
883 
884 static int
885 generic_to_ipv4(int option)
886 {
887 	switch (option) {
888 		case MCAST_JOIN_GROUP:
889 			return IP_ADD_MEMBERSHIP;
890 		case MCAST_JOIN_SOURCE_GROUP:
891 			return IP_ADD_SOURCE_MEMBERSHIP;
892 		case MCAST_LEAVE_GROUP:
893 			return IP_DROP_MEMBERSHIP;
894 		case MCAST_BLOCK_SOURCE:
895 			return IP_BLOCK_SOURCE;
896 		case MCAST_UNBLOCK_SOURCE:
897 			return IP_UNBLOCK_SOURCE;
898 		case MCAST_LEAVE_SOURCE_GROUP:
899 			return IP_DROP_SOURCE_MEMBERSHIP;
900 	}
901 
902 	return -1;
903 }
904 
905 
906 static net_interface*
907 get_multicast_interface(ipv4_protocol* protocol, const in_addr* address)
908 {
909 	sockaddr_in groupAddr;
910 	net_route* route = sDatalinkModule->get_route(sDomain,
911 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
912 	if (route == NULL)
913 		return NULL;
914 
915 	return route->interface;
916 }
917 
918 
919 static status_t
920 ipv4_delta_membership(ipv4_protocol* protocol, int option,
921 	in_addr* interfaceAddr, in_addr* groupAddr, in_addr* sourceAddr)
922 {
923 	net_interface* interface = NULL;
924 
925 	if (interfaceAddr->s_addr == INADDR_ANY) {
926 		interface = get_multicast_interface(protocol, groupAddr);
927 	} else {
928 		sockaddr_in address;
929 		interface = sDatalinkModule->get_interface_with_address(sDomain,
930 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
931 	}
932 
933 	if (interface == NULL)
934 		return ENODEV;
935 
936 	return ipv4_delta_membership(protocol, option, interface,
937 		groupAddr, sourceAddr);
938 }
939 
940 
941 static status_t
942 ipv4_generic_delta_membership(ipv4_protocol* protocol, int option,
943 	uint32 index, const sockaddr_storage* _groupAddr,
944 	const sockaddr_storage* _sourceAddr)
945 {
946 	if (_groupAddr->ss_family != AF_INET)
947 		return B_BAD_VALUE;
948 
949 	if (_sourceAddr && _sourceAddr->ss_family != AF_INET)
950 		return B_BAD_VALUE;
951 
952 	const in_addr* groupAddr = &((const sockaddr_in*)_groupAddr)->sin_addr;
953 
954 	net_interface* interface;
955 	if (index == 0)
956 		interface = get_multicast_interface(protocol, groupAddr);
957 	else
958 		interface = sDatalinkModule->get_interface(sDomain, index);
959 
960 	if (interface == NULL)
961 		return ENODEV;
962 
963 	const in_addr* sourceAddr = NULL;
964 	if (_sourceAddr)
965 		sourceAddr = &((const sockaddr_in*)_sourceAddr)->sin_addr;
966 
967 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
968 		groupAddr, sourceAddr);
969 }
970 
971 
972 static status_t
973 get_int_option(void* target, size_t length, int value)
974 {
975 	if (length != sizeof(int))
976 		return B_BAD_VALUE;
977 
978 	return user_memcpy(target, &value, sizeof(int));
979 }
980 
981 
982 template<typename Type> static status_t
983 set_int_option(Type &target, const void* _value, size_t length)
984 {
985 	int value;
986 
987 	if (length != sizeof(int))
988 		return B_BAD_VALUE;
989 
990 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
991 		return B_BAD_ADDRESS;
992 
993 	target = value;
994 	return B_OK;
995 }
996 
997 
998 //	#pragma mark -
999 
1000 
1001 net_protocol*
1002 ipv4_init_protocol(net_socket* socket)
1003 {
1004 	ipv4_protocol* protocol = new (std::nothrow) ipv4_protocol();
1005 	if (protocol == NULL)
1006 		return NULL;
1007 
1008 	protocol->raw = NULL;
1009 	protocol->service_type = 0;
1010 	protocol->time_to_live = kDefaultTTL;
1011 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
1012 	protocol->flags = 0;
1013 	protocol->interface_address = NULL;
1014 	return protocol;
1015 }
1016 
1017 
1018 status_t
1019 ipv4_uninit_protocol(net_protocol* _protocol)
1020 {
1021 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1022 
1023 	delete protocol->raw;
1024 	delete protocol->interface_address;
1025 	delete protocol;
1026 	return B_OK;
1027 }
1028 
1029 
1030 /*!	Since open() is only called on the top level protocol, when we get here
1031 	it means we are on a SOCK_RAW socket.
1032 */
1033 status_t
1034 ipv4_open(net_protocol* _protocol)
1035 {
1036 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1037 
1038 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1039 	if (raw == NULL)
1040 		return B_NO_MEMORY;
1041 
1042 	status_t status = raw->InitCheck();
1043 	if (status != B_OK) {
1044 		delete raw;
1045 		return status;
1046 	}
1047 
1048 	TRACE_SK(protocol, "Open()");
1049 
1050 	protocol->raw = raw;
1051 
1052 	MutexLocker locker(sRawSocketsLock);
1053 	sRawSockets.Add(raw);
1054 	return B_OK;
1055 }
1056 
1057 
1058 status_t
1059 ipv4_close(net_protocol* _protocol)
1060 {
1061 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1062 	RawSocket* raw = protocol->raw;
1063 	if (raw == NULL)
1064 		return B_ERROR;
1065 
1066 	TRACE_SK(protocol, "Close()");
1067 
1068 	MutexLocker locker(sRawSocketsLock);
1069 	sRawSockets.Remove(raw);
1070 	delete raw;
1071 	protocol->raw = NULL;
1072 
1073 	return B_OK;
1074 }
1075 
1076 
1077 status_t
1078 ipv4_free(net_protocol* protocol)
1079 {
1080 	return B_OK;
1081 }
1082 
1083 
1084 status_t
1085 ipv4_connect(net_protocol* protocol, const struct sockaddr* address)
1086 {
1087 	return B_ERROR;
1088 }
1089 
1090 
1091 status_t
1092 ipv4_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1093 {
1094 	return EOPNOTSUPP;
1095 }
1096 
1097 
1098 status_t
1099 ipv4_control(net_protocol* _protocol, int level, int option, void* value,
1100 	size_t* _length)
1101 {
1102 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1103 		return sDatalinkModule->control(sDomain, option, value, _length);
1104 
1105 	return B_BAD_VALUE;
1106 }
1107 
1108 
1109 status_t
1110 ipv4_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1111 	int* _length)
1112 {
1113 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1114 
1115 	if (level == IPPROTO_IP) {
1116 		if (option == IP_HDRINCL) {
1117 			return get_int_option(value, *_length,
1118 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1119 		}
1120 		if (option == IP_RECVDSTADDR) {
1121 			return get_int_option(value, *_length,
1122 				(protocol->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0);
1123 		}
1124 		if (option == IP_TTL)
1125 			return get_int_option(value, *_length, protocol->time_to_live);
1126 		if (option == IP_TOS)
1127 			return get_int_option(value, *_length, protocol->service_type);
1128 		if (option == IP_MULTICAST_TTL) {
1129 			return get_int_option(value, *_length,
1130 				protocol->multicast_time_to_live);
1131 		}
1132 		if (option == IP_ADD_MEMBERSHIP
1133 			|| option == IP_DROP_MEMBERSHIP
1134 			|| option == IP_BLOCK_SOURCE
1135 			|| option == IP_UNBLOCK_SOURCE
1136 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1137 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1138 			|| option == MCAST_JOIN_GROUP
1139 			|| option == MCAST_LEAVE_GROUP
1140 			|| option == MCAST_BLOCK_SOURCE
1141 			|| option == MCAST_UNBLOCK_SOURCE
1142 			|| option == MCAST_JOIN_SOURCE_GROUP
1143 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1144 			// RFC 3678, Section 4.1:
1145 			// ``An error of EOPNOTSUPP is returned if these options are
1146 			// used with getsockopt().''
1147 			return EOPNOTSUPP;
1148 		}
1149 
1150 		dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1151 		return ENOPROTOOPT;
1152 	}
1153 
1154 	return sSocketModule->get_option(protocol->socket, level, option, value,
1155 		_length);
1156 }
1157 
1158 
1159 status_t
1160 ipv4_setsockopt(net_protocol* _protocol, int level, int option,
1161 	const void* value, int length)
1162 {
1163 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1164 
1165 	if (level == IPPROTO_IP) {
1166 		if (option == IP_HDRINCL) {
1167 			int headerIncluded;
1168 			if (length != sizeof(int))
1169 				return B_BAD_VALUE;
1170 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded))
1171 					!= B_OK)
1172 				return B_BAD_ADDRESS;
1173 
1174 			if (headerIncluded)
1175 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1176 			else
1177 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1178 
1179 			return B_OK;
1180 		}
1181 		if (option == IP_RECVDSTADDR) {
1182 			int getAddress;
1183 			if (length != sizeof(int))
1184 				return B_BAD_VALUE;
1185 			if (user_memcpy(&getAddress, value, sizeof(int)) != B_OK)
1186 				return B_BAD_ADDRESS;
1187 
1188 			if (getAddress && (protocol->socket->type == SOCK_DGRAM
1189 					|| protocol->socket->type == SOCK_RAW))
1190 				protocol->flags |= IP_FLAG_RECEIVE_DEST_ADDR;
1191 			else
1192 				protocol->flags &= ~IP_FLAG_RECEIVE_DEST_ADDR;
1193 
1194 			return B_OK;
1195 		}
1196 		if (option == IP_TTL)
1197 			return set_int_option(protocol->time_to_live, value, length);
1198 		if (option == IP_TOS)
1199 			return set_int_option(protocol->service_type, value, length);
1200 		if (option == IP_MULTICAST_IF) {
1201 			if (length != sizeof(struct in_addr))
1202 				return B_BAD_VALUE;
1203 
1204 			struct sockaddr_in* address = new (std::nothrow) sockaddr_in;
1205 			if (address == NULL)
1206 				return B_NO_MEMORY;
1207 
1208 			if (user_memcpy(&address->sin_addr, value, sizeof(struct in_addr))
1209 					!= B_OK) {
1210 				delete address;
1211 				return B_BAD_ADDRESS;
1212 			}
1213 
1214 			// Using INADDR_ANY to remove the previous setting.
1215 			if (address->sin_addr.s_addr == htonl(INADDR_ANY)) {
1216 				delete address;
1217 				delete protocol->interface_address;
1218 				protocol->interface_address = NULL;
1219 				return B_OK;
1220 			}
1221 
1222 			struct net_interface* interface
1223 				= sDatalinkModule->get_interface_with_address(sDomain,
1224 					(struct sockaddr*)address);
1225 			if (interface == NULL) {
1226 				delete address;
1227 				return EADDRNOTAVAIL;
1228 			}
1229 
1230 			delete protocol->interface_address;
1231 			protocol->interface_address = (struct sockaddr*)address;
1232 			return B_OK;
1233 		}
1234 		if (option == IP_MULTICAST_TTL) {
1235 			return set_int_option(protocol->multicast_time_to_live, value,
1236 				length);
1237 		}
1238 		if (option == IP_ADD_MEMBERSHIP || option == IP_DROP_MEMBERSHIP) {
1239 			ip_mreq mreq;
1240 			if (length != sizeof(ip_mreq))
1241 				return B_BAD_VALUE;
1242 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) != B_OK)
1243 				return B_BAD_ADDRESS;
1244 
1245 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1246 				&mreq.imr_multiaddr, NULL);
1247 		}
1248 		if (option == IP_BLOCK_SOURCE
1249 			|| option == IP_UNBLOCK_SOURCE
1250 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1251 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1252 			ip_mreq_source mreq;
1253 			if (length != sizeof(ip_mreq_source))
1254 				return B_BAD_VALUE;
1255 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) != B_OK)
1256 				return B_BAD_ADDRESS;
1257 
1258 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1259 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1260 		}
1261 		if (option == MCAST_LEAVE_GROUP || option == MCAST_JOIN_GROUP) {
1262 			group_req greq;
1263 			if (length != sizeof(group_req))
1264 				return B_BAD_VALUE;
1265 			if (user_memcpy(&greq, value, sizeof(group_req)) != B_OK)
1266 				return B_BAD_ADDRESS;
1267 
1268 			return ipv4_generic_delta_membership(protocol, option,
1269 				greq.gr_interface, &greq.gr_group, NULL);
1270 		}
1271 		if (option == MCAST_BLOCK_SOURCE
1272 			|| option == MCAST_UNBLOCK_SOURCE
1273 			|| option == MCAST_JOIN_SOURCE_GROUP
1274 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1275 			group_source_req greq;
1276 			if (length != sizeof(group_source_req))
1277 				return B_BAD_VALUE;
1278 			if (user_memcpy(&greq, value, sizeof(group_source_req)) != B_OK)
1279 				return B_BAD_ADDRESS;
1280 
1281 			return ipv4_generic_delta_membership(protocol, option,
1282 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1283 		}
1284 
1285 		dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1286 		return ENOPROTOOPT;
1287 	}
1288 
1289 	return sSocketModule->set_option(protocol->socket, level, option,
1290 		value, length);
1291 }
1292 
1293 
1294 status_t
1295 ipv4_bind(net_protocol* protocol, const struct sockaddr* address)
1296 {
1297 	if (address->sa_family != AF_INET)
1298 		return EAFNOSUPPORT;
1299 
1300 	// only INADDR_ANY and addresses of local interfaces are accepted:
1301 	if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY
1302 		|| IN_MULTICAST(ntohl(((sockaddr_in*)address)->sin_addr.s_addr))
1303 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1304 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1305 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1306 			// explicitly set length, as our callers can't be trusted to
1307 			// always provide the correct length!
1308 		return B_OK;
1309 	}
1310 
1311 	return B_ERROR;
1312 		// address is unknown on this host
1313 }
1314 
1315 
1316 status_t
1317 ipv4_unbind(net_protocol* protocol, struct sockaddr* address)
1318 {
1319 	// nothing to do here
1320 	return B_OK;
1321 }
1322 
1323 
1324 status_t
1325 ipv4_listen(net_protocol* protocol, int count)
1326 {
1327 	return EOPNOTSUPP;
1328 }
1329 
1330 
1331 status_t
1332 ipv4_shutdown(net_protocol* protocol, int direction)
1333 {
1334 	return EOPNOTSUPP;
1335 }
1336 
1337 
1338 status_t
1339 ipv4_send_routed_data(net_protocol* _protocol, struct net_route* route,
1340 	net_buffer* buffer)
1341 {
1342 	if (route == NULL)
1343 		return B_BAD_VALUE;
1344 
1345 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1346 	net_interface* interface = route->interface;
1347 
1348 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1349 		buffer->size);
1350 
1351 	sockaddr_in& source = *(sockaddr_in*)buffer->source;
1352 	sockaddr_in& destination = *(sockaddr_in*)buffer->destination;
1353 	sockaddr_in& broadcastAddress = *(sockaddr_in*)interface->destination;
1354 
1355 	bool headerIncluded = false, checksumNeeded = true;
1356 	if (protocol != NULL)
1357 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1358 
1359 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1360 
1361 	if (destination.sin_addr.s_addr == INADDR_ANY)
1362 		return EDESTADDRREQ;
1363 
1364 	if ((interface->device->flags & IFF_BROADCAST) != 0
1365 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1366 			|| destination.sin_addr.s_addr
1367 				== broadcastAddress.sin_addr.s_addr)) {
1368 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1369 			return B_BAD_VALUE;
1370 		buffer->flags |= MSG_BCAST;
1371 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr)))
1372 		buffer->flags |= MSG_MCAST;
1373 
1374 	// Add IP header (if needed)
1375 
1376 	if (!headerIncluded) {
1377 		NetBufferPrepend<ipv4_header> header(buffer);
1378 		if (header.Status() != B_OK)
1379 			return header.Status();
1380 
1381 		header->version = IP_VERSION;
1382 		header->header_length = sizeof(ipv4_header) / 4;
1383 		header->service_type = protocol ? protocol->service_type : 0;
1384 		header->total_length = htons(buffer->size);
1385 		header->id = htons(atomic_add(&sPacketID, 1));
1386 		header->fragment_offset = 0;
1387 		if (protocol) {
1388 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1389 				? protocol->multicast_time_to_live : protocol->time_to_live;
1390 		} else {
1391 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1392 				? kDefaultMulticastTTL : kDefaultTTL;
1393 		}
1394 		header->protocol = protocol
1395 			? protocol->socket->protocol : buffer->protocol;
1396 		header->checksum = 0;
1397 
1398 		header->source = source.sin_addr.s_addr;
1399 		header->destination = destination.sin_addr.s_addr;
1400 	} else {
1401 		// if IP_HDRINCL, check if the source address is set
1402 		NetBufferHeaderReader<ipv4_header> header(buffer);
1403 		if (header.Status() != B_OK)
1404 			return header.Status();
1405 
1406 		if (header->source == 0) {
1407 			header->source = source.sin_addr.s_addr;
1408 			header->checksum = 0;
1409 			header.Sync();
1410 		} else
1411 			checksumNeeded = false;
1412 	}
1413 
1414 	if (buffer->size > 0xffff)
1415 		return EMSGSIZE;
1416 
1417 	if (checksumNeeded) {
1418 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1419 			sizeof(ipv4_header), true);
1420 	}
1421 
1422 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer "
1423 		"checksum: %ld",
1424 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1425 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1426 
1427 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08x",
1428 		ntohl(destination.sin_addr.s_addr));
1429 
1430 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1431 	if (buffer->size > mtu) {
1432 		// we need to fragment the packet
1433 		return send_fragments(protocol, route, buffer, mtu);
1434 	}
1435 
1436 	return sDatalinkModule->send_data(route, buffer);
1437 }
1438 
1439 
1440 status_t
1441 ipv4_send_data(net_protocol* _protocol, net_buffer* buffer)
1442 {
1443 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1444 
1445 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1446 
1447 	if (protocol && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1448 		if (buffer->size < sizeof(ipv4_header))
1449 			return B_BAD_VALUE;
1450 
1451 		sockaddr_in* source = (sockaddr_in*)buffer->source;
1452 		sockaddr_in* destination = (sockaddr_in*)buffer->destination;
1453 
1454 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1455 			offsetof(ipv4_header, source)>(buffer));
1456 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1457 			offsetof(ipv4_header, destination)>(buffer));
1458 	}
1459 
1460 	// handle IP_MULTICAST_IF
1461 	if (IN_MULTICAST(ntohl(((sockaddr_in*)buffer->destination)->
1462 			sin_addr.s_addr)) && protocol->interface_address != NULL) {
1463 		net_interface* interface
1464 			= sDatalinkModule->get_interface_with_address(sDomain,
1465 				protocol->interface_address);
1466 		if (interface == NULL || (interface->flags & IFF_UP) == 0)
1467 			return EADDRNOTAVAIL;
1468 
1469 		buffer->interface = interface;
1470 
1471 		net_route* route = sDatalinkModule->get_route(sDomain,
1472 			interface->address);
1473 		if (route == NULL)
1474 			return ENETUNREACH;
1475 
1476 		return sDatalinkModule->send_data(route, buffer);
1477 	}
1478 
1479 	return sDatalinkModule->send_datagram(protocol, sDomain, buffer);
1480 }
1481 
1482 
1483 ssize_t
1484 ipv4_send_avail(net_protocol* protocol)
1485 {
1486 	return B_ERROR;
1487 }
1488 
1489 
1490 status_t
1491 ipv4_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1492 	net_buffer** _buffer)
1493 {
1494 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1495 	RawSocket* raw = protocol->raw;
1496 	if (raw == NULL)
1497 		return B_ERROR;
1498 
1499 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1500 
1501 	return raw->SocketDequeue(flags, _buffer);
1502 }
1503 
1504 
1505 ssize_t
1506 ipv4_read_avail(net_protocol* _protocol)
1507 {
1508 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1509 	RawSocket* raw = protocol->raw;
1510 	if (raw == NULL)
1511 		return B_ERROR;
1512 
1513 	return raw->AvailableData();
1514 }
1515 
1516 
1517 struct net_domain*
1518 ipv4_get_domain(net_protocol* protocol)
1519 {
1520 	return sDomain;
1521 }
1522 
1523 
1524 size_t
1525 ipv4_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1526 {
1527 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1528 	if (route == NULL)
1529 		return 0;
1530 
1531 	size_t mtu;
1532 	if (route->mtu != 0)
1533 		mtu = route->mtu;
1534 	else
1535 		mtu = route->interface->mtu;
1536 
1537 	sDatalinkModule->put_route(sDomain, route);
1538 	return mtu - sizeof(ipv4_header);
1539 }
1540 
1541 
1542 status_t
1543 ipv4_receive_data(net_buffer* buffer)
1544 {
1545 	TRACE("ReceiveData(%p [%ld bytes])", buffer, buffer->size);
1546 
1547 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1548 	if (bufferHeader.Status() != B_OK)
1549 		return bufferHeader.Status();
1550 
1551 	ipv4_header& header = bufferHeader.Data();
1552 	TRACE_ONLY(dump_ipv4_header(header));
1553 
1554 	if (header.version != IP_VERSION)
1555 		return B_BAD_TYPE;
1556 
1557 	uint16 packetLength = header.TotalLength();
1558 	uint16 headerLength = header.HeaderLength();
1559 	if (packetLength > buffer->size
1560 		|| headerLength < sizeof(ipv4_header))
1561 		return B_BAD_DATA;
1562 
1563 	// TODO: would be nice to have a direct checksum function somewhere
1564 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1565 		return B_BAD_DATA;
1566 
1567 	// lower layers notion of broadcast or multicast have no relevance to us
1568 	// TODO: they actually have when deciding whether to send an ICMP error
1569 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1570 
1571 	sockaddr_in destination;
1572 	fill_sockaddr_in(&destination, header.destination);
1573 
1574 	if (header.destination == INADDR_BROADCAST) {
1575 		buffer->flags |= MSG_BCAST;
1576 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1577 		buffer->flags |= MSG_MCAST;
1578 	} else {
1579 		uint32 matchedAddressType = 0;
1580 
1581 		// test if the packet is really for us
1582 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1583 				&buffer->interface, &matchedAddressType)
1584 			&& !sDatalinkModule->is_local_link_address(sDomain, true,
1585 				buffer->destination, &buffer->interface)) {
1586 			TRACE("  ReceiveData(): packet was not for us %x -> %x",
1587 				ntohl(header.source), ntohl(header.destination));
1588 			// Send ICMP error: Host unreachable
1589 			sDomain->module->error_reply(NULL, buffer,
1590 				icmp_encode(ICMP_TYPE_UNREACH, ICMP_CODE_HOST_UNREACH), NULL);
1591 			return B_ERROR;
1592 		}
1593 
1594 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1595 		buffer->flags |= matchedAddressType;
1596 	}
1597 
1598 	// set net_buffer's source/destination address
1599 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1600 	memcpy(buffer->destination, &destination, sizeof(sockaddr_in));
1601 
1602 	uint8 protocol = buffer->protocol = header.protocol;
1603 	buffer->hoplimit = header.time_to_live;
1604 
1605 	// remove any trailing/padding data
1606 	status_t status = gBufferModule->trim(buffer, packetLength);
1607 	if (status != B_OK)
1608 		return status;
1609 
1610 	// check for fragmentation
1611 	uint16 fragmentOffset = header.FragmentOffset();
1612 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1613 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1614 		// this is a fragment
1615 		TRACE("  ReceiveData(): Found a Fragment!");
1616 		status = reassemble_fragments(header, &buffer);
1617 		TRACE("  ReceiveData():  -> %s", strerror(status));
1618 		if (status != B_OK)
1619 			return status;
1620 
1621 		if (buffer == NULL) {
1622 			// buffer was put into fragment packet
1623 			TRACE("  ReceiveData(): Not yet assembled.");
1624 			return B_OK;
1625 		}
1626 	}
1627 
1628 	// Preserve the ipv4 header for ICMP processing
1629 	// TODO: solve this differently, and discard net_buffer::network_header!
1630 	ipv4_header* clonedHeader = (ipv4_header*)malloc(sizeof(ipv4_header));
1631 	if (clonedHeader == NULL)
1632 		return B_NO_MEMORY;
1633 
1634 	memcpy(clonedHeader, &header, sizeof(ipv4_header));
1635 	buffer->network_header = clonedHeader;
1636 
1637 	// Since the buffer might have been changed (reassembled fragment)
1638 	// we must no longer access bufferHeader or header anymore after
1639 	// this point
1640 
1641 	bool rawDelivered = raw_receive_data(buffer);
1642 
1643 	gBufferModule->remove_header(buffer, headerLength);
1644 		// the header is of variable size and may include IP options
1645 		// (TODO: that we ignore for now)
1646 
1647 	net_protocol_module_info* module = receiving_protocol(protocol);
1648 	if (module == NULL) {
1649 		// no handler for this packet
1650 		if (!rawDelivered) {
1651 			sDomain->module->error_reply(NULL, buffer,
1652 				icmp_encode(ICMP_TYPE_UNREACH, ICMP_CODE_PROTO_UNREACH), NULL);
1653 		}
1654 		return EAFNOSUPPORT;
1655 	}
1656 
1657 	if ((buffer->flags & MSG_MCAST) != 0) {
1658 		// Unfortunately historical reasons dictate that the IP multicast
1659 		// model be a little different from the unicast one. We deliver
1660 		// this frame directly to all sockets registered with interest
1661 		// for this multicast group.
1662 		deliver_multicast(module, buffer, false);
1663 		gBufferModule->free(buffer);
1664 		return B_OK;
1665 	}
1666 
1667 	return module->receive_data(buffer);
1668 }
1669 
1670 
1671 status_t
1672 ipv4_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1673 {
1674 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1675 
1676 	if (protocol->raw == NULL)
1677 		return B_ERROR;
1678 
1679 	return protocol->raw->SocketEnqueue(buffer);
1680 }
1681 
1682 
1683 status_t
1684 ipv4_error_received(uint32 code, net_buffer* buffer)
1685 {
1686 	net_protocol_module_info* protocol = receiving_protocol(buffer->protocol);
1687 	if (protocol == NULL)
1688 		return B_ERROR;
1689 
1690 	// propagate error
1691 	return protocol->error_received(code, buffer);
1692 }
1693 
1694 
1695 status_t
1696 ipv4_error_reply(net_protocol* protocol, net_buffer* causedError, uint32 code,
1697 	void* errorData)
1698 {
1699 	// Directly obtain the ICMP protocol module
1700 	net_protocol_module_info* icmp = receiving_protocol(IPPROTO_ICMP);
1701 	if (icmp == NULL)
1702 		return B_ERROR;
1703 
1704 	return icmp->error_reply(protocol, causedError, code, errorData);
1705 }
1706 
1707 
1708 ssize_t
1709 ipv4_process_ancillary_data_no_container(net_protocol* protocol,
1710 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1711 {
1712 	ssize_t bytesWritten = 0;
1713 
1714 	if ((((ipv4_protocol*)protocol)->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0) {
1715 		if (msgControlLen < CMSG_SPACE(sizeof(struct in_addr)))
1716 			return B_NO_MEMORY;
1717 
1718 		cmsghdr* messageHeader = (cmsghdr*)msgControl;
1719 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1720 		messageHeader->cmsg_level = IPPROTO_IP;
1721 		messageHeader->cmsg_type = IP_RECVDSTADDR;
1722 
1723 		memcpy(CMSG_DATA(messageHeader),
1724 		 	&((struct sockaddr_in*)buffer->destination)->sin_addr,
1725 		 	sizeof(struct in_addr));
1726 
1727 		bytesWritten += CMSG_SPACE(sizeof(struct in_addr));
1728 	}
1729 
1730 	return bytesWritten;
1731 }
1732 
1733 
1734 //	#pragma mark -
1735 
1736 
1737 status_t
1738 init_ipv4()
1739 {
1740 	sPacketID = (int32)system_time();
1741 
1742 	mutex_init(&sRawSocketsLock, "raw sockets");
1743 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1744 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1745 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1746 
1747 	status_t status;
1748 
1749 	sMulticastState = new MulticastState();
1750 	if (sMulticastState == NULL) {
1751 		status = B_NO_MEMORY;
1752 		goto err4;
1753 	}
1754 
1755 	status = sMulticastState->Init();
1756 	if (status != B_OK)
1757 		goto err5;
1758 
1759 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1760 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1761 	if (sFragmentHash == NULL)
1762 		goto err5;
1763 
1764 	new (&sRawSockets) RawSocketList;
1765 		// static initializers do not work in the kernel,
1766 		// so we have to do it here, manually
1767 		// TODO: for modules, this shouldn't be required
1768 
1769 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1770 		"network/protocols/ipv4/v1", NULL);
1771 	if (status != B_OK)
1772 		goto err6;
1773 
1774 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1775 		&gIPv4AddressModule, &sDomain);
1776 	if (status != B_OK)
1777 		goto err6;
1778 
1779 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1780 		"list all current IPv4 multicast states");
1781 
1782 	return B_OK;
1783 
1784 err6:
1785 	hash_uninit(sFragmentHash);
1786 err5:
1787 	delete sMulticastState;
1788 err4:
1789 	mutex_destroy(&sReceivingProtocolLock);
1790 	mutex_destroy(&sMulticastGroupsLock);
1791 	mutex_destroy(&sFragmentLock);
1792 	mutex_destroy(&sRawSocketsLock);
1793 	return status;
1794 }
1795 
1796 
1797 status_t
1798 uninit_ipv4()
1799 {
1800 	mutex_lock(&sReceivingProtocolLock);
1801 
1802 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1803 
1804 	// put all the domain receiving protocols we gathered so far
1805 	for (uint32 i = 0; i < 256; i++) {
1806 		if (sReceivingProtocol[i] != NULL)
1807 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1808 	}
1809 
1810 	gStackModule->unregister_domain(sDomain);
1811 	mutex_unlock(&sReceivingProtocolLock);
1812 
1813 	delete sMulticastState;
1814 	hash_uninit(sFragmentHash);
1815 
1816 	mutex_destroy(&sMulticastGroupsLock);
1817 	mutex_destroy(&sFragmentLock);
1818 	mutex_destroy(&sRawSocketsLock);
1819 	mutex_destroy(&sReceivingProtocolLock);
1820 
1821 	return B_OK;
1822 }
1823 
1824 
1825 static status_t
1826 ipv4_std_ops(int32 op, ...)
1827 {
1828 	switch (op) {
1829 		case B_MODULE_INIT:
1830 			return init_ipv4();
1831 		case B_MODULE_UNINIT:
1832 			return uninit_ipv4();
1833 
1834 		default:
1835 			return B_ERROR;
1836 	}
1837 }
1838 
1839 
1840 net_protocol_module_info gIPv4Module = {
1841 	{
1842 		"network/protocols/ipv4/v1",
1843 		0,
1844 		ipv4_std_ops
1845 	},
1846 	NET_PROTOCOL_ATOMIC_MESSAGES,
1847 
1848 	ipv4_init_protocol,
1849 	ipv4_uninit_protocol,
1850 	ipv4_open,
1851 	ipv4_close,
1852 	ipv4_free,
1853 	ipv4_connect,
1854 	ipv4_accept,
1855 	ipv4_control,
1856 	ipv4_getsockopt,
1857 	ipv4_setsockopt,
1858 	ipv4_bind,
1859 	ipv4_unbind,
1860 	ipv4_listen,
1861 	ipv4_shutdown,
1862 	ipv4_send_data,
1863 	ipv4_send_routed_data,
1864 	ipv4_send_avail,
1865 	ipv4_read_data,
1866 	ipv4_read_avail,
1867 	ipv4_get_domain,
1868 	ipv4_get_mtu,
1869 	ipv4_receive_data,
1870 	ipv4_deliver_data,
1871 	ipv4_error_received,
1872 	ipv4_error_reply,
1873 	NULL,		// add_ancillary_data()
1874 	NULL,		// process_ancillary_data()
1875 	ipv4_process_ancillary_data_no_container,
1876 	NULL,		// send_data_no_buffer()
1877 	NULL		// read_data_no_buffer()
1878 };
1879 
1880 module_dependency module_dependencies[] = {
1881 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1882 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1883 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1884 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1885 	{}
1886 };
1887 
1888 module_info* modules[] = {
1889 	(module_info*)&gIPv4Module,
1890 	NULL
1891 };
1892