xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision a76f629efad0ba4d6518d918a39dbcc6097fe536)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4_address.h"
11 #include "multicast.h"
12 
13 #include <net_datalink.h>
14 #include <net_datalink_protocol.h>
15 #include <net_device.h>
16 #include <net_protocol.h>
17 #include <net_stack.h>
18 #include <NetBufferUtilities.h>
19 #include <ProtocolUtilities.h>
20 
21 #include <ByteOrder.h>
22 #include <KernelExport.h>
23 #include <util/AutoLock.h>
24 #include <util/list.h>
25 #include <util/khash.h>
26 #include <util/DoublyLinkedList.h>
27 #include <util/MultiHashTable.h>
28 
29 #include <netinet/in.h>
30 #include <netinet/ip.h>
31 #include <new>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <utility>
36 
37 
38 //#define TRACE_IPV4
39 #ifdef TRACE_IPV4
40 #	define TRACE(format, args...) \
41 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
42 #	define TRACE_SK(protocol, format, args...) \
43 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
44 			protocol , ##args)
45 #else
46 #	define TRACE(args...)		do { } while (0)
47 #	define TRACE_SK(args...)	do { } while (0)
48 #endif
49 
50 
51 struct ipv4_header {
52 #if B_HOST_IS_LENDIAN == 1
53 	uint8		header_length : 4;	// header length in 32-bit words
54 	uint8		version : 4;
55 #else
56 	uint8		version : 4;
57 	uint8		header_length : 4;
58 #endif
59 	uint8		service_type;
60 	uint16		total_length;
61 	uint16		id;
62 	uint16		fragment_offset;
63 	uint8		time_to_live;
64 	uint8		protocol;
65 	uint16		checksum;
66 	in_addr_t	source;
67 	in_addr_t	destination;
68 
69 	uint16 HeaderLength() const { return header_length << 2; }
70 	uint16 TotalLength() const { return ntohs(total_length); }
71 	uint16 FragmentOffset() const { return ntohs(fragment_offset); }
72 } _PACKED;
73 
74 #define IP_VERSION				4
75 
76 // fragment flags
77 #define IP_RESERVED_FLAG		0x8000
78 #define IP_DONT_FRAGMENT		0x4000
79 #define IP_MORE_FRAGMENTS		0x2000
80 #define IP_FRAGMENT_OFFSET_MASK	0x1fff
81 
82 #define MAX_HASH_FRAGMENTS 		64
83 	// slots in the fragment packet's hash
84 #define FRAGMENT_TIMEOUT		60000000LL
85 	// discard fragment after 60 seconds
86 
87 typedef DoublyLinkedList<struct net_buffer,
88 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
89 
90 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
91 
92 struct ipv4_packet_key {
93 	in_addr_t	source;
94 	in_addr_t	destination;
95 	uint16		id;
96 	uint8		protocol;
97 };
98 
99 class FragmentPacket {
100 public:
101 							FragmentPacket(const ipv4_packet_key &key);
102 							~FragmentPacket();
103 
104 			status_t		AddFragment(uint16 start, uint16 end,
105 								net_buffer* buffer, bool lastFragment);
106 			status_t		Reassemble(net_buffer* to);
107 
108 			bool			IsComplete() const
109 								{ return fReceivedLastFragment
110 									&& fBytesLeft == 0; }
111 
112 	static	uint32			Hash(void* _packet, const void* _key, uint32 range);
113 	static	int				Compare(void* _packet, const void* _key);
114 	static	int32			NextOffset()
115 								{ return offsetof(FragmentPacket, fNext); }
116 	static	void			StaleTimer(struct net_timer* timer, void* data);
117 
118 private:
119 			FragmentPacket	*fNext;
120 			struct ipv4_packet_key fKey;
121 			bool			fReceivedLastFragment;
122 			int32			fBytesLeft;
123 			FragmentList	fFragments;
124 			net_timer		fTimer;
125 };
126 
127 
128 class RawSocket
129 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
130 public:
131 							RawSocket(net_socket* socket);
132 };
133 
134 typedef DoublyLinkedList<RawSocket> RawSocketList;
135 
136 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
137 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
138 
139 struct MulticastStateHash {
140 	typedef std::pair<const in_addr* , uint32> KeyType;
141 	typedef IPv4GroupInterface ValueType;
142 
143 	size_t HashKey(const KeyType &key) const
144 		{ return key.first->s_addr ^ key.second; }
145 	size_t Hash(ValueType* value) const
146 		{ return HashKey(std::make_pair(&value->Address(),
147 			value->Interface()->index)); }
148 	bool Compare(const KeyType &key, ValueType* value) const
149 		{ return value->Interface()->index == key.second
150 			&& value->Address().s_addr == key.first->s_addr; }
151 	bool CompareValues(ValueType* value1, ValueType* value2) const
152 		{ return value1->Interface()->index == value2->Interface()->index
153 			&& value1->Address().s_addr == value2->Address().s_addr; }
154 	ValueType*& GetLink(ValueType* value) const { return value->HashLink(); }
155 };
156 
157 
158 struct ipv4_protocol : net_protocol {
159 	ipv4_protocol()
160 		:
161 		multicast_filter(this)
162 	{
163 	}
164 
165 	RawSocket	*raw;
166 	uint8		service_type;
167 	uint8		time_to_live;
168 	uint8		multicast_time_to_live;
169 	uint32		flags;
170 	struct sockaddr* interface_address; // for IP_MULTICAST_IF
171 
172 	IPv4MulticastFilter multicast_filter;
173 };
174 
175 // protocol flags
176 #define IP_FLAG_HEADER_INCLUDED		0x01
177 #define IP_FLAG_RECEIVE_DEST_ADDR	0x02
178 
179 
180 static const int kDefaultTTL = 254;
181 static const int kDefaultMulticastTTL = 1;
182 
183 
184 extern net_protocol_module_info gIPv4Module;
185 	// we need this in ipv4_std_ops() for registering the AF_INET domain
186 
187 net_stack_module_info* gStackModule;
188 net_buffer_module_info* gBufferModule;
189 
190 static struct net_domain* sDomain;
191 static net_datalink_module_info* sDatalinkModule;
192 static net_socket_module_info* sSocketModule;
193 static int32 sPacketID;
194 static RawSocketList sRawSockets;
195 static mutex sRawSocketsLock;
196 static mutex sFragmentLock;
197 static hash_table* sFragmentHash;
198 static mutex sMulticastGroupsLock;
199 
200 typedef MultiHashTable<MulticastStateHash> MulticastState;
201 static MulticastState* sMulticastState;
202 
203 static net_protocol_module_info* sReceivingProtocol[256];
204 static mutex sReceivingProtocolLock;
205 
206 
207 static const char*
208 print_address(const in_addr* address, char* buf, size_t bufLen)
209 {
210 	unsigned int addr = ntohl(address->s_addr);
211 
212 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
213 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
214 
215 	return buf;
216 }
217 
218 
219 RawSocket::RawSocket(net_socket* socket)
220 	:
221 	DatagramSocket<>("ipv4 raw socket", socket)
222 {
223 }
224 
225 
226 //	#pragma mark -
227 
228 
229 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
230 	:
231 	fKey(key),
232 	fReceivedLastFragment(false),
233 	fBytesLeft(IP_MAXPACKET)
234 {
235 	gStackModule->init_timer(&fTimer, StaleTimer, this);
236 }
237 
238 
239 FragmentPacket::~FragmentPacket()
240 {
241 	// cancel the kill timer
242 	gStackModule->set_timer(&fTimer, -1);
243 
244 	// delete all fragments
245 	net_buffer* buffer;
246 	while ((buffer = fFragments.RemoveHead()) != NULL) {
247 		gBufferModule->free(buffer);
248 	}
249 }
250 
251 
252 status_t
253 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
254 	bool lastFragment)
255 {
256 	// restart the timer
257 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
258 
259 	if (start >= end) {
260 		// invalid fragment
261 		return B_BAD_DATA;
262 	}
263 
264 	// Search for a position in the list to insert the fragment
265 
266 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
267 	net_buffer* previous = NULL;
268 	net_buffer* next = NULL;
269 	while ((previous = iterator.Next()) != NULL) {
270 		if (previous->fragment.start <= start) {
271 			// The new fragment can be inserted after this one
272 			break;
273 		}
274 
275 		next = previous;
276 	}
277 
278 	// See if we already have the fragment's data
279 
280 	if (previous != NULL && previous->fragment.start <= start
281 		&& previous->fragment.end >= end) {
282 		// we do, so we can just drop this fragment
283 		gBufferModule->free(buffer);
284 		return B_OK;
285 	}
286 
287 	TRACE("    previous: %p, next: %p", previous, next);
288 
289 	// If we have parts of the data already, truncate as needed
290 
291 	if (previous != NULL && previous->fragment.end > start) {
292 		TRACE("    remove header %d bytes", previous->fragment.end - start);
293 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
294 		start = previous->fragment.end;
295 	}
296 	if (next != NULL && next->fragment.start < end) {
297 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
298 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
299 		end = next->fragment.start;
300 	}
301 
302 	// Now try if we can already merge the fragments together
303 
304 	// We will always keep the last buffer received, so that we can still
305 	// report an error (in which case we're not responsible for freeing it)
306 
307 	if (previous != NULL && previous->fragment.end == start) {
308 		fFragments.Remove(previous);
309 
310 		buffer->fragment.start = previous->fragment.start;
311 		buffer->fragment.end = end;
312 
313 		status_t status = gBufferModule->merge(buffer, previous, false);
314 		TRACE("    merge previous: %s", strerror(status));
315 		if (status != B_OK) {
316 			fFragments.Insert(next, previous);
317 			return status;
318 		}
319 
320 		fFragments.Insert(next, buffer);
321 
322 		// cut down existing hole
323 		fBytesLeft -= end - start;
324 
325 		if (lastFragment && !fReceivedLastFragment) {
326 			fReceivedLastFragment = true;
327 			fBytesLeft -= IP_MAXPACKET - end;
328 		}
329 
330 		TRACE("    hole length: %d", (int)fBytesLeft);
331 
332 		return B_OK;
333 	} else if (next != NULL && next->fragment.start == end) {
334 		net_buffer* afterNext = (net_buffer*)next->link.next;
335 		fFragments.Remove(next);
336 
337 		buffer->fragment.start = start;
338 		buffer->fragment.end = next->fragment.end;
339 
340 		status_t status = gBufferModule->merge(buffer, next, true);
341 		TRACE("    merge next: %s", strerror(status));
342 		if (status != B_OK) {
343 			// Insert "next" at its previous position
344 			fFragments.Insert(afterNext, next);
345 			return status;
346 		}
347 
348 		fFragments.Insert(afterNext, buffer);
349 
350 		// cut down existing hole
351 		fBytesLeft -= end - start;
352 
353 		if (lastFragment && !fReceivedLastFragment) {
354 			fReceivedLastFragment = true;
355 			fBytesLeft -= IP_MAXPACKET - end;
356 		}
357 
358 		TRACE("    hole length: %d", (int)fBytesLeft);
359 
360 		return B_OK;
361 	}
362 
363 	// We couldn't merge the fragments, so we need to add it as is
364 
365 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
366 
367 	buffer->fragment.start = start;
368 	buffer->fragment.end = end;
369 	fFragments.Insert(next, buffer);
370 
371 	// update length of the hole, if any
372 	fBytesLeft -= end - start;
373 
374 	if (lastFragment && !fReceivedLastFragment) {
375 		fReceivedLastFragment = true;
376 		fBytesLeft -= IP_MAXPACKET - end;
377 	}
378 
379 	TRACE("    hole length: %d", (int)fBytesLeft);
380 
381 	return B_OK;
382 }
383 
384 
385 /*!	Reassembles the fragments to the specified buffer \a to.
386 	This buffer must have been added via AddFragment() before.
387 */
388 status_t
389 FragmentPacket::Reassemble(net_buffer* to)
390 {
391 	if (!IsComplete())
392 		return B_ERROR;
393 
394 	net_buffer* buffer = NULL;
395 
396 	net_buffer* fragment;
397 	while ((fragment = fFragments.RemoveHead()) != NULL) {
398 		if (buffer != NULL) {
399 			status_t status;
400 			if (to == fragment) {
401 				status = gBufferModule->merge(fragment, buffer, false);
402 				buffer = fragment;
403 			} else
404 				status = gBufferModule->merge(buffer, fragment, true);
405 			if (status != B_OK)
406 				return status;
407 		} else
408 			buffer = fragment;
409 	}
410 
411 	if (buffer != to)
412 		panic("ipv4 packet reassembly did not work correctly.");
413 
414 	return B_OK;
415 }
416 
417 
418 int
419 FragmentPacket::Compare(void* _packet, const void* _key)
420 {
421 	const ipv4_packet_key* key = (ipv4_packet_key*)_key;
422 	ipv4_packet_key* packetKey = &((FragmentPacket*)_packet)->fKey;
423 
424 	if (packetKey->id == key->id
425 		&& packetKey->source == key->source
426 		&& packetKey->destination == key->destination
427 		&& packetKey->protocol == key->protocol)
428 		return 0;
429 
430 	return 1;
431 }
432 
433 
434 uint32
435 FragmentPacket::Hash(void* _packet, const void* _key, uint32 range)
436 {
437 	const struct ipv4_packet_key* key = (struct ipv4_packet_key*)_key;
438 	FragmentPacket* packet = (FragmentPacket*)_packet;
439 	if (packet != NULL)
440 		key = &packet->fKey;
441 
442 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
443 }
444 
445 
446 /*static*/ void
447 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
448 {
449 	FragmentPacket* packet = (FragmentPacket*)data;
450 	TRACE("Assembling FragmentPacket %p timed out!", packet);
451 
452 	MutexLocker locker(&sFragmentLock);
453 
454 	hash_remove(sFragmentHash, packet);
455 	delete packet;
456 }
457 
458 
459 //	#pragma mark -
460 
461 
462 #if 0
463 static void
464 dump_ipv4_header(ipv4_header &header)
465 {
466 	struct pretty_ipv4 {
467 	#if B_HOST_IS_LENDIAN == 1
468 		uint8 a;
469 		uint8 b;
470 		uint8 c;
471 		uint8 d;
472 	#else
473 		uint8 d;
474 		uint8 c;
475 		uint8 b;
476 		uint8 a;
477 	#endif
478 	};
479 	struct pretty_ipv4* src = (struct pretty_ipv4*)&header.source;
480 	struct pretty_ipv4* dst = (struct pretty_ipv4*)&header.destination;
481 	dprintf("  version: %d\n", header.version);
482 	dprintf("  header_length: 4 * %d\n", header.header_length);
483 	dprintf("  service_type: %d\n", header.service_type);
484 	dprintf("  total_length: %d\n", header.TotalLength());
485 	dprintf("  id: %d\n", ntohs(header.id));
486 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
487 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
488 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
489 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
490 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
491 	dprintf("  time_to_live: %d\n", header.time_to_live);
492 	dprintf("  protocol: %d\n", header.protocol);
493 	dprintf("  checksum: %d\n", ntohs(header.checksum));
494 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
495 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
496 }
497 #endif
498 
499 
500 static int
501 dump_ipv4_multicast(int argc, char** argv)
502 {
503 	MulticastState::Iterator it = sMulticastState->GetIterator();
504 
505 	while (it.HasNext()) {
506 		IPv4GroupInterface* state = it.Next();
507 
508 		char addressBuffer[64];
509 
510 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
511 			print_address(&state->Address(), addressBuffer,
512 			sizeof(addressBuffer)),
513 			state->Mode() == IPv4GroupInterface::kExclude
514 				? "Exclude" : "Include");
515 
516 		int count = 0;
517 		IPv4GroupInterface::AddressSet::Iterator it
518 			= state->Sources().GetIterator();
519 		while (it.HasNext()) {
520 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
521 				addressBuffer, sizeof(addressBuffer)));
522 			count++;
523 		}
524 
525 		kprintf("}> sock %p\n", state->Parent()->Socket());
526 	}
527 
528 	return 0;
529 }
530 
531 
532 /*!	Attempts to re-assemble fragmented packets.
533 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
534 		will point to its buffer, otherwise, it will be \c NULL.
535 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
536 */
537 static status_t
538 reassemble_fragments(const ipv4_header &header, net_buffer** _buffer)
539 {
540 	net_buffer* buffer = *_buffer;
541 	status_t status;
542 
543 	struct ipv4_packet_key key;
544 	key.source = (in_addr_t)header.source;
545 	key.destination = (in_addr_t)header.destination;
546 	key.id = header.id;
547 	key.protocol = header.protocol;
548 
549 	// TODO: Make locking finer grained.
550 	MutexLocker locker(&sFragmentLock);
551 
552 	FragmentPacket* packet = (FragmentPacket*)hash_lookup(sFragmentHash, &key);
553 	if (packet == NULL) {
554 		// New fragment packet
555 		packet = new (std::nothrow) FragmentPacket(key);
556 		if (packet == NULL)
557 			return B_NO_MEMORY;
558 
559 		// add packet to hash
560 		status = hash_insert(sFragmentHash, packet);
561 		if (status != B_OK) {
562 			delete packet;
563 			return status;
564 		}
565 	}
566 
567 	uint16 fragmentOffset = header.FragmentOffset();
568 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
569 	uint16 end = start + header.TotalLength() - header.HeaderLength();
570 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
571 
572 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
573 		lastFragment ? "last ": "", end - start, start);
574 
575 	// Remove header unless this is the first fragment
576 	if (start != 0)
577 		gBufferModule->remove_header(buffer, header.HeaderLength());
578 
579 	status = packet->AddFragment(start, end, buffer, lastFragment);
580 	if (status != B_OK)
581 		return status;
582 
583 	if (packet->IsComplete()) {
584 		hash_remove(sFragmentHash, packet);
585 			// no matter if reassembling succeeds, we won't need this packet
586 			// anymore
587 
588 		status = packet->Reassemble(buffer);
589 		delete packet;
590 
591 		// _buffer does not change
592 		return status;
593 	}
594 
595 	// This indicates that the packet is not yet complete
596 	*_buffer = NULL;
597 	return B_OK;
598 }
599 
600 
601 /*!	Fragments the incoming buffer and send all fragments via the specified
602 	\a route.
603 */
604 static status_t
605 send_fragments(ipv4_protocol* protocol, struct net_route* route,
606 	net_buffer* buffer, uint32 mtu)
607 {
608 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
609 
610 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
611 	if (originalHeader.Status() != B_OK)
612 		return originalHeader.Status();
613 
614 	uint16 headerLength = originalHeader->HeaderLength();
615 	uint32 bytesLeft = buffer->size - headerLength;
616 	uint32 fragmentOffset = 0;
617 	status_t status = B_OK;
618 
619 	net_buffer* headerBuffer = gBufferModule->split(buffer, headerLength);
620 	if (headerBuffer == NULL)
621 		return B_NO_MEMORY;
622 
623 	// TODO: we need to make sure ipv4_header is contiguous or
624 	// use another construct.
625 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
626 	ipv4_header* header = &bufferHeader.Data();
627 
628 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
629 	// this way)
630 	mtu -= headerLength;
631 	mtu &= ~7;
632 	TRACE("  adjusted MTU to %ld, bytesLeft %ld", mtu, bytesLeft);
633 
634 	while (bytesLeft > 0) {
635 		uint32 fragmentLength = min_c(bytesLeft, mtu);
636 		bytesLeft -= fragmentLength;
637 		bool lastFragment = bytesLeft == 0;
638 
639 		header->total_length = htons(fragmentLength + headerLength);
640 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
641 			| (fragmentOffset >> 3));
642 		header->checksum = 0;
643 		header->checksum = gStackModule->checksum((uint8*)header,
644 			headerLength);
645 			// TODO: compute the checksum only for those parts that changed?
646 
647 		TRACE("  send fragment of %ld bytes (%ld bytes left)", fragmentLength,
648 			bytesLeft);
649 
650 		net_buffer* fragmentBuffer;
651 		if (!lastFragment) {
652 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
653 			fragmentOffset += fragmentLength;
654 		} else
655 			fragmentBuffer = buffer;
656 
657 		if (fragmentBuffer == NULL) {
658 			status = B_NO_MEMORY;
659 			break;
660 		}
661 
662 		// copy header to fragment
663 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
664 
665 		// send fragment
666 		if (status == B_OK)
667 			status = sDatalinkModule->send_data(route, fragmentBuffer);
668 
669 		if (lastFragment) {
670 			// we don't own the last buffer, so we don't have to free it
671 			break;
672 		}
673 
674 		if (status != B_OK) {
675 			gBufferModule->free(fragmentBuffer);
676 			break;
677 		}
678 	}
679 
680 	gBufferModule->free(headerBuffer);
681 	return status;
682 }
683 
684 
685 static status_t
686 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
687 	bool deliverToRaw)
688 {
689 	if (module->deliver_data == NULL)
690 		return B_OK;
691 
692 	MutexLocker _(sMulticastGroupsLock);
693 
694 	sockaddr_in* multicastAddr = (sockaddr_in*)buffer->destination;
695 
696 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
697 		&multicastAddr->sin_addr, buffer->interface->index));
698 
699 	while (it.HasNext()) {
700 		IPv4GroupInterface* state = it.Next();
701 
702 		if (deliverToRaw && state->Parent()->Socket()->raw == NULL)
703 			continue;
704 
705 		if (state->FilterAccepts(buffer)) {
706 			// as Multicast filters are installed with an IPv4 protocol
707 			// reference, we need to go and find the appropriate instance
708 			// related to the 'receiving protocol' with module 'module'.
709 			net_protocol* proto
710 				= state->Parent()->Socket()->socket->first_protocol;
711 
712 			while (proto && proto->module != module)
713 				proto = proto->next;
714 
715 			if (proto)
716 				module->deliver_data(proto, buffer);
717 		}
718 	}
719 
720 	return B_OK;
721 }
722 
723 
724 static void
725 raw_receive_data(net_buffer* buffer)
726 {
727 	MutexLocker locker(sRawSocketsLock);
728 
729 	if (sRawSockets.IsEmpty())
730 		return;
731 
732 	TRACE("RawReceiveData(%i)", buffer->protocol);
733 
734 	if (buffer->flags & MSG_MCAST) {
735 		// we need to call deliver_multicast here separately as
736 		// buffer still has the IP header, and it won't in the
737 		// next call. This isn't very optimized but works for now.
738 		// A better solution would be to hold separate hash tables
739 		// and lists for RAW and non-RAW sockets.
740 		deliver_multicast(&gIPv4Module, buffer, true);
741 	} else {
742 		RawSocketList::Iterator iterator = sRawSockets.GetIterator();
743 
744 		while (iterator.HasNext()) {
745 			RawSocket* raw = iterator.Next();
746 
747 			if (raw->Socket()->protocol == buffer->protocol)
748 				raw->SocketEnqueue(buffer);
749 		}
750 	}
751 }
752 
753 
754 static inline sockaddr*
755 fill_sockaddr_in(sockaddr_in* target, in_addr_t address)
756 {
757 	target->sin_family = AF_INET;
758 	target->sin_len = sizeof(sockaddr_in);
759 	target->sin_port = 0;
760 	target->sin_addr.s_addr = address;
761 	return (sockaddr*)target;
762 }
763 
764 
765 status_t
766 IPv4Multicast::JoinGroup(IPv4GroupInterface* state)
767 {
768 	MutexLocker _(sMulticastGroupsLock);
769 
770 	sockaddr_in groupAddr;
771 	net_interface* interface = state->Interface();
772 
773 	status_t status = interface->first_info->join_multicast(
774 		interface->first_protocol,
775 		fill_sockaddr_in(&groupAddr, state->Address().s_addr));
776 	if (status != B_OK)
777 		return status;
778 
779 	sMulticastState->Insert(state);
780 	return B_OK;
781 }
782 
783 
784 status_t
785 IPv4Multicast::LeaveGroup(IPv4GroupInterface* state)
786 {
787 	MutexLocker _(sMulticastGroupsLock);
788 
789 	sMulticastState->Remove(state);
790 
791 	sockaddr_in groupAddr;
792 	net_interface* interface = state->Interface();
793 
794 	return interface->first_protocol->module->join_multicast(
795 		interface->first_protocol,
796 		fill_sockaddr_in(&groupAddr, state->Address().s_addr));
797 }
798 
799 
800 static net_protocol_module_info*
801 receiving_protocol(uint8 protocol)
802 {
803 	net_protocol_module_info* module = sReceivingProtocol[protocol];
804 	if (module != NULL)
805 		return module;
806 
807 	MutexLocker locker(sReceivingProtocolLock);
808 
809 	module = sReceivingProtocol[protocol];
810 	if (module != NULL)
811 		return module;
812 
813 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
814 			&module) == B_OK)
815 		sReceivingProtocol[protocol] = module;
816 
817 	return module;
818 }
819 
820 
821 static status_t
822 ipv4_delta_group(IPv4GroupInterface* group, int option,
823 	net_interface* interface, const in_addr* sourceAddr)
824 {
825 	switch (option) {
826 		case IP_ADD_MEMBERSHIP:
827 			return group->Add();
828 		case IP_DROP_MEMBERSHIP:
829 			return group->Drop();
830 		case IP_BLOCK_SOURCE:
831 			return group->BlockSource(*sourceAddr);
832 		case IP_UNBLOCK_SOURCE:
833 			return group->UnblockSource(*sourceAddr);
834 		case IP_ADD_SOURCE_MEMBERSHIP:
835 			return group->AddSSM(*sourceAddr);
836 		case IP_DROP_SOURCE_MEMBERSHIP:
837 			return group->DropSSM(*sourceAddr);
838 	}
839 
840 	return B_ERROR;
841 }
842 
843 
844 static status_t
845 ipv4_delta_membership(ipv4_protocol* protocol, int option,
846 	net_interface* interface, const in_addr* groupAddr,
847 	const in_addr* sourceAddr)
848 {
849 	IPv4MulticastFilter &filter = protocol->multicast_filter;
850 	IPv4GroupInterface* state = NULL;
851 	status_t status = B_OK;
852 
853 	switch (option) {
854 		case IP_ADD_MEMBERSHIP:
855 		case IP_ADD_SOURCE_MEMBERSHIP:
856 			status = filter.GetState(*groupAddr, interface, state, true);
857 			break;
858 
859 		case IP_DROP_MEMBERSHIP:
860 		case IP_BLOCK_SOURCE:
861 		case IP_UNBLOCK_SOURCE:
862 		case IP_DROP_SOURCE_MEMBERSHIP:
863 			filter.GetState(*groupAddr, interface, state, false);
864 			if (state == NULL) {
865 				if (option == IP_DROP_MEMBERSHIP
866 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
867 					return EADDRNOTAVAIL;
868 
869 				return B_BAD_VALUE;
870 			}
871 			break;
872 	}
873 
874 	if (status != B_OK)
875 		return status;
876 
877 	status = ipv4_delta_group(state, option, interface, sourceAddr);
878 	filter.ReturnState(state);
879 	return status;
880 }
881 
882 
883 static int
884 generic_to_ipv4(int option)
885 {
886 	switch (option) {
887 		case MCAST_JOIN_GROUP:
888 			return IP_ADD_MEMBERSHIP;
889 		case MCAST_JOIN_SOURCE_GROUP:
890 			return IP_ADD_SOURCE_MEMBERSHIP;
891 		case MCAST_LEAVE_GROUP:
892 			return IP_DROP_MEMBERSHIP;
893 		case MCAST_BLOCK_SOURCE:
894 			return IP_BLOCK_SOURCE;
895 		case MCAST_UNBLOCK_SOURCE:
896 			return IP_UNBLOCK_SOURCE;
897 		case MCAST_LEAVE_SOURCE_GROUP:
898 			return IP_DROP_SOURCE_MEMBERSHIP;
899 	}
900 
901 	return -1;
902 }
903 
904 
905 static net_interface*
906 get_multicast_interface(ipv4_protocol* protocol, const in_addr* address)
907 {
908 	sockaddr_in groupAddr;
909 	net_route* route = sDatalinkModule->get_route(sDomain,
910 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
911 	if (route == NULL)
912 		return NULL;
913 
914 	return route->interface;
915 }
916 
917 
918 static status_t
919 ipv4_delta_membership(ipv4_protocol* protocol, int option,
920 	in_addr* interfaceAddr, in_addr* groupAddr, in_addr* sourceAddr)
921 {
922 	net_interface* interface = NULL;
923 
924 	if (interfaceAddr->s_addr == INADDR_ANY) {
925 		interface = get_multicast_interface(protocol, groupAddr);
926 	} else {
927 		sockaddr_in address;
928 		interface = sDatalinkModule->get_interface_with_address(sDomain,
929 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
930 	}
931 
932 	if (interface == NULL)
933 		return ENODEV;
934 
935 	return ipv4_delta_membership(protocol, option, interface,
936 		groupAddr, sourceAddr);
937 }
938 
939 
940 static status_t
941 ipv4_generic_delta_membership(ipv4_protocol* protocol, int option,
942 	uint32 index, const sockaddr_storage* _groupAddr,
943 	const sockaddr_storage* _sourceAddr)
944 {
945 	if (_groupAddr->ss_family != AF_INET)
946 		return B_BAD_VALUE;
947 
948 	if (_sourceAddr && _sourceAddr->ss_family != AF_INET)
949 		return B_BAD_VALUE;
950 
951 	const in_addr* groupAddr = &((const sockaddr_in*)_groupAddr)->sin_addr;
952 
953 	net_interface* interface;
954 	if (index == 0)
955 		interface = get_multicast_interface(protocol, groupAddr);
956 	else
957 		interface = sDatalinkModule->get_interface(sDomain, index);
958 
959 	if (interface == NULL)
960 		return ENODEV;
961 
962 	const in_addr* sourceAddr = NULL;
963 	if (_sourceAddr)
964 		sourceAddr = &((const sockaddr_in*)_sourceAddr)->sin_addr;
965 
966 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
967 		groupAddr, sourceAddr);
968 }
969 
970 
971 static status_t
972 get_int_option(void* target, size_t length, int value)
973 {
974 	if (length != sizeof(int))
975 		return B_BAD_VALUE;
976 
977 	return user_memcpy(target, &value, sizeof(int));
978 }
979 
980 
981 template<typename Type> static status_t
982 set_int_option(Type &target, const void* _value, size_t length)
983 {
984 	int value;
985 
986 	if (length != sizeof(int))
987 		return B_BAD_VALUE;
988 
989 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
990 		return B_BAD_ADDRESS;
991 
992 	target = value;
993 	return B_OK;
994 }
995 
996 
997 //	#pragma mark -
998 
999 
1000 net_protocol*
1001 ipv4_init_protocol(net_socket* socket)
1002 {
1003 	ipv4_protocol* protocol = new (std::nothrow) ipv4_protocol();
1004 	if (protocol == NULL)
1005 		return NULL;
1006 
1007 	protocol->raw = NULL;
1008 	protocol->service_type = 0;
1009 	protocol->time_to_live = kDefaultTTL;
1010 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
1011 	protocol->flags = 0;
1012 	protocol->interface_address = NULL;
1013 	return protocol;
1014 }
1015 
1016 
1017 status_t
1018 ipv4_uninit_protocol(net_protocol* _protocol)
1019 {
1020 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1021 
1022 	delete protocol->raw;
1023 	delete protocol->interface_address;
1024 	delete protocol;
1025 	return B_OK;
1026 }
1027 
1028 
1029 /*!	Since open() is only called on the top level protocol, when we get here
1030 	it means we are on a SOCK_RAW socket.
1031 */
1032 status_t
1033 ipv4_open(net_protocol* _protocol)
1034 {
1035 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1036 
1037 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1038 	if (raw == NULL)
1039 		return B_NO_MEMORY;
1040 
1041 	status_t status = raw->InitCheck();
1042 	if (status != B_OK) {
1043 		delete raw;
1044 		return status;
1045 	}
1046 
1047 	TRACE_SK(protocol, "Open()");
1048 
1049 	protocol->raw = raw;
1050 
1051 	MutexLocker locker(sRawSocketsLock);
1052 	sRawSockets.Add(raw);
1053 	return B_OK;
1054 }
1055 
1056 
1057 status_t
1058 ipv4_close(net_protocol* _protocol)
1059 {
1060 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1061 	RawSocket* raw = protocol->raw;
1062 	if (raw == NULL)
1063 		return B_ERROR;
1064 
1065 	TRACE_SK(protocol, "Close()");
1066 
1067 	MutexLocker locker(sRawSocketsLock);
1068 	sRawSockets.Remove(raw);
1069 	delete raw;
1070 	protocol->raw = NULL;
1071 
1072 	return B_OK;
1073 }
1074 
1075 
1076 status_t
1077 ipv4_free(net_protocol* protocol)
1078 {
1079 	return B_OK;
1080 }
1081 
1082 
1083 status_t
1084 ipv4_connect(net_protocol* protocol, const struct sockaddr* address)
1085 {
1086 	return B_ERROR;
1087 }
1088 
1089 
1090 status_t
1091 ipv4_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1092 {
1093 	return EOPNOTSUPP;
1094 }
1095 
1096 
1097 status_t
1098 ipv4_control(net_protocol* _protocol, int level, int option, void* value,
1099 	size_t* _length)
1100 {
1101 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1102 		return sDatalinkModule->control(sDomain, option, value, _length);
1103 
1104 	return B_BAD_VALUE;
1105 }
1106 
1107 
1108 status_t
1109 ipv4_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1110 	int* _length)
1111 {
1112 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1113 
1114 	if (level == IPPROTO_IP) {
1115 		if (option == IP_HDRINCL) {
1116 			return get_int_option(value, *_length,
1117 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1118 		}
1119 		if (option == IP_RECVDSTADDR) {
1120 			return get_int_option(value, *_length,
1121 				(protocol->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0);
1122 		}
1123 		if (option == IP_TTL)
1124 			return get_int_option(value, *_length, protocol->time_to_live);
1125 		if (option == IP_TOS)
1126 			return get_int_option(value, *_length, protocol->service_type);
1127 		if (option == IP_MULTICAST_TTL) {
1128 			return get_int_option(value, *_length,
1129 				protocol->multicast_time_to_live);
1130 		}
1131 		if (option == IP_ADD_MEMBERSHIP
1132 			|| option == IP_DROP_MEMBERSHIP
1133 			|| option == IP_BLOCK_SOURCE
1134 			|| option == IP_UNBLOCK_SOURCE
1135 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1136 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1137 			|| option == MCAST_JOIN_GROUP
1138 			|| option == MCAST_LEAVE_GROUP
1139 			|| option == MCAST_BLOCK_SOURCE
1140 			|| option == MCAST_UNBLOCK_SOURCE
1141 			|| option == MCAST_JOIN_SOURCE_GROUP
1142 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1143 			// RFC 3678, Section 4.1:
1144 			// ``An error of EOPNOTSUPP is returned if these options are
1145 			// used with getsockopt().''
1146 			return EOPNOTSUPP;
1147 		}
1148 
1149 		dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1150 		return ENOPROTOOPT;
1151 	}
1152 
1153 	return sSocketModule->get_option(protocol->socket, level, option, value,
1154 		_length);
1155 }
1156 
1157 
1158 status_t
1159 ipv4_setsockopt(net_protocol* _protocol, int level, int option,
1160 	const void* value, int length)
1161 {
1162 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1163 
1164 	if (level == IPPROTO_IP) {
1165 		if (option == IP_HDRINCL) {
1166 			int headerIncluded;
1167 			if (length != sizeof(int))
1168 				return B_BAD_VALUE;
1169 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded))
1170 					!= B_OK)
1171 				return B_BAD_ADDRESS;
1172 
1173 			if (headerIncluded)
1174 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1175 			else
1176 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1177 
1178 			return B_OK;
1179 		}
1180 		if (option == IP_RECVDSTADDR) {
1181 			int getAddress;
1182 			if (length != sizeof(int))
1183 				return B_BAD_VALUE;
1184 			if (user_memcpy(&getAddress, value, sizeof(int)) != B_OK)
1185 				return B_BAD_ADDRESS;
1186 
1187 			if (getAddress && (protocol->socket->type == SOCK_DGRAM
1188 					|| protocol->socket->type == SOCK_RAW))
1189 				protocol->flags |= IP_FLAG_RECEIVE_DEST_ADDR;
1190 			else
1191 				protocol->flags &= ~IP_FLAG_RECEIVE_DEST_ADDR;
1192 
1193 			return B_OK;
1194 		}
1195 		if (option == IP_TTL)
1196 			return set_int_option(protocol->time_to_live, value, length);
1197 		if (option == IP_TOS)
1198 			return set_int_option(protocol->service_type, value, length);
1199 		if (option == IP_MULTICAST_IF) {
1200 			if (length != sizeof(struct in_addr))
1201 				return B_BAD_VALUE;
1202 
1203 			struct sockaddr_in* address = new (std::nothrow) sockaddr_in;
1204 			if (address == NULL)
1205 				return B_NO_MEMORY;
1206 
1207 			if (user_memcpy(&address->sin_addr, value, sizeof(struct in_addr))
1208 					!= B_OK) {
1209 				delete address;
1210 				return B_BAD_ADDRESS;
1211 			}
1212 
1213 			// Using INADDR_ANY to remove the previous setting.
1214 			if (address->sin_addr.s_addr == htonl(INADDR_ANY)) {
1215 				delete address;
1216 				delete protocol->interface_address;
1217 				protocol->interface_address = NULL;
1218 				return B_OK;
1219 			}
1220 
1221 			struct net_interface* interface
1222 				= sDatalinkModule->get_interface_with_address(sDomain,
1223 					(struct sockaddr*)address);
1224 			if (interface == NULL) {
1225 				delete address;
1226 				return EADDRNOTAVAIL;
1227 			}
1228 
1229 			delete protocol->interface_address;
1230 			protocol->interface_address = (struct sockaddr*)address;
1231 			return B_OK;
1232 		}
1233 		if (option == IP_MULTICAST_TTL) {
1234 			return set_int_option(protocol->multicast_time_to_live, value,
1235 				length);
1236 		}
1237 		if (option == IP_ADD_MEMBERSHIP || option == IP_DROP_MEMBERSHIP) {
1238 			ip_mreq mreq;
1239 			if (length != sizeof(ip_mreq))
1240 				return B_BAD_VALUE;
1241 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) != B_OK)
1242 				return B_BAD_ADDRESS;
1243 
1244 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1245 				&mreq.imr_multiaddr, NULL);
1246 		}
1247 		if (option == IP_BLOCK_SOURCE
1248 			|| option == IP_UNBLOCK_SOURCE
1249 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1250 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1251 			ip_mreq_source mreq;
1252 			if (length != sizeof(ip_mreq_source))
1253 				return B_BAD_VALUE;
1254 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) != B_OK)
1255 				return B_BAD_ADDRESS;
1256 
1257 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1258 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1259 		}
1260 		if (option == MCAST_LEAVE_GROUP || option == MCAST_JOIN_GROUP) {
1261 			group_req greq;
1262 			if (length != sizeof(group_req))
1263 				return B_BAD_VALUE;
1264 			if (user_memcpy(&greq, value, sizeof(group_req)) != B_OK)
1265 				return B_BAD_ADDRESS;
1266 
1267 			return ipv4_generic_delta_membership(protocol, option,
1268 				greq.gr_interface, &greq.gr_group, NULL);
1269 		}
1270 		if (option == MCAST_BLOCK_SOURCE
1271 			|| option == MCAST_UNBLOCK_SOURCE
1272 			|| option == MCAST_JOIN_SOURCE_GROUP
1273 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1274 			group_source_req greq;
1275 			if (length != sizeof(group_source_req))
1276 				return B_BAD_VALUE;
1277 			if (user_memcpy(&greq, value, sizeof(group_source_req)) != B_OK)
1278 				return B_BAD_ADDRESS;
1279 
1280 			return ipv4_generic_delta_membership(protocol, option,
1281 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1282 		}
1283 
1284 		dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1285 		return ENOPROTOOPT;
1286 	}
1287 
1288 	return sSocketModule->set_option(protocol->socket, level, option,
1289 		value, length);
1290 }
1291 
1292 
1293 status_t
1294 ipv4_bind(net_protocol* protocol, const struct sockaddr* address)
1295 {
1296 	if (address->sa_family != AF_INET)
1297 		return EAFNOSUPPORT;
1298 
1299 	// only INADDR_ANY and addresses of local interfaces are accepted:
1300 	if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY
1301 		|| IN_MULTICAST(ntohl(((sockaddr_in*)address)->sin_addr.s_addr))
1302 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1303 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1304 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1305 			// explicitly set length, as our callers can't be trusted to
1306 			// always provide the correct length!
1307 		return B_OK;
1308 	}
1309 
1310 	return B_ERROR;
1311 		// address is unknown on this host
1312 }
1313 
1314 
1315 status_t
1316 ipv4_unbind(net_protocol* protocol, struct sockaddr* address)
1317 {
1318 	// nothing to do here
1319 	return B_OK;
1320 }
1321 
1322 
1323 status_t
1324 ipv4_listen(net_protocol* protocol, int count)
1325 {
1326 	return EOPNOTSUPP;
1327 }
1328 
1329 
1330 status_t
1331 ipv4_shutdown(net_protocol* protocol, int direction)
1332 {
1333 	return EOPNOTSUPP;
1334 }
1335 
1336 
1337 status_t
1338 ipv4_send_routed_data(net_protocol* _protocol, struct net_route* route,
1339 	net_buffer* buffer)
1340 {
1341 	if (route == NULL)
1342 		return B_BAD_VALUE;
1343 
1344 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1345 	net_interface* interface = route->interface;
1346 
1347 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1348 		buffer->size);
1349 
1350 	sockaddr_in& source = *(sockaddr_in*)buffer->source;
1351 	sockaddr_in& destination = *(sockaddr_in*)buffer->destination;
1352 	sockaddr_in& broadcastAddress = *(sockaddr_in*)interface->destination;
1353 
1354 	bool headerIncluded = false, checksumNeeded = true;
1355 	if (protocol != NULL)
1356 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1357 
1358 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1359 
1360 	if (destination.sin_addr.s_addr == INADDR_ANY)
1361 		return EDESTADDRREQ;
1362 
1363 	if ((interface->device->flags & IFF_BROADCAST) != 0
1364 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1365 			|| destination.sin_addr.s_addr
1366 				== broadcastAddress.sin_addr.s_addr)) {
1367 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1368 			return B_BAD_VALUE;
1369 		buffer->flags |= MSG_BCAST;
1370 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr)))
1371 		buffer->flags |= MSG_MCAST;
1372 
1373 	// Add IP header (if needed)
1374 
1375 	if (!headerIncluded) {
1376 		NetBufferPrepend<ipv4_header> header(buffer);
1377 		if (header.Status() != B_OK)
1378 			return header.Status();
1379 
1380 		header->version = IP_VERSION;
1381 		header->header_length = sizeof(ipv4_header) / 4;
1382 		header->service_type = protocol ? protocol->service_type : 0;
1383 		header->total_length = htons(buffer->size);
1384 		header->id = htons(atomic_add(&sPacketID, 1));
1385 		header->fragment_offset = 0;
1386 		if (protocol) {
1387 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1388 				? protocol->multicast_time_to_live : protocol->time_to_live;
1389 		} else {
1390 			header->time_to_live = (buffer->flags & MSG_MCAST) != 0
1391 				? kDefaultMulticastTTL : kDefaultTTL;
1392 		}
1393 		header->protocol = protocol
1394 			? protocol->socket->protocol : buffer->protocol;
1395 		header->checksum = 0;
1396 
1397 		header->source = source.sin_addr.s_addr;
1398 		header->destination = destination.sin_addr.s_addr;
1399 	} else {
1400 		// if IP_HDRINCL, check if the source address is set
1401 		NetBufferHeaderReader<ipv4_header> header(buffer);
1402 		if (header.Status() != B_OK)
1403 			return header.Status();
1404 
1405 		if (header->source == 0) {
1406 			header->source = source.sin_addr.s_addr;
1407 			header->checksum = 0;
1408 			header.Sync();
1409 		} else
1410 			checksumNeeded = false;
1411 	}
1412 
1413 	if (buffer->size > 0xffff)
1414 		return EMSGSIZE;
1415 
1416 	if (checksumNeeded) {
1417 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1418 			sizeof(ipv4_header), true);
1419 	}
1420 
1421 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer "
1422 		"checksum: %ld",
1423 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1424 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1425 
1426 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08x",
1427 		ntohl(destination.sin_addr.s_addr));
1428 
1429 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1430 	if (buffer->size > mtu) {
1431 		// we need to fragment the packet
1432 		return send_fragments(protocol, route, buffer, mtu);
1433 	}
1434 
1435 	return sDatalinkModule->send_data(route, buffer);
1436 }
1437 
1438 
1439 status_t
1440 ipv4_send_data(net_protocol* _protocol, net_buffer* buffer)
1441 {
1442 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1443 
1444 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1445 
1446 	if (protocol && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1447 		if (buffer->size < sizeof(ipv4_header))
1448 			return B_BAD_VALUE;
1449 
1450 		sockaddr_in* source = (sockaddr_in*)buffer->source;
1451 		sockaddr_in* destination = (sockaddr_in*)buffer->destination;
1452 
1453 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1454 			offsetof(ipv4_header, source)>(buffer));
1455 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1456 			offsetof(ipv4_header, destination)>(buffer));
1457 	}
1458 
1459 	// handle IP_MULTICAST_IF
1460 	if (IN_MULTICAST(ntohl(((sockaddr_in*)buffer->destination)->
1461 			sin_addr.s_addr)) && protocol->interface_address != NULL) {
1462 		net_interface* interface
1463 			= sDatalinkModule->get_interface_with_address(sDomain,
1464 				protocol->interface_address);
1465 		if (interface == NULL || (interface->flags & IFF_UP) == 0)
1466 			return EADDRNOTAVAIL;
1467 
1468 		buffer->interface = interface;
1469 
1470 		net_route* route = sDatalinkModule->get_route(sDomain,
1471 			interface->address);
1472 		if (route == NULL)
1473 			return ENETUNREACH;
1474 
1475 		return sDatalinkModule->send_data(route, buffer);
1476 	}
1477 
1478 	return sDatalinkModule->send_datagram(protocol, sDomain, buffer);
1479 }
1480 
1481 
1482 ssize_t
1483 ipv4_send_avail(net_protocol* protocol)
1484 {
1485 	return B_ERROR;
1486 }
1487 
1488 
1489 status_t
1490 ipv4_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1491 	net_buffer** _buffer)
1492 {
1493 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1494 	RawSocket* raw = protocol->raw;
1495 	if (raw == NULL)
1496 		return B_ERROR;
1497 
1498 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1499 
1500 	return raw->SocketDequeue(flags, _buffer);
1501 }
1502 
1503 
1504 ssize_t
1505 ipv4_read_avail(net_protocol* _protocol)
1506 {
1507 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1508 	RawSocket* raw = protocol->raw;
1509 	if (raw == NULL)
1510 		return B_ERROR;
1511 
1512 	return raw->AvailableData();
1513 }
1514 
1515 
1516 struct net_domain*
1517 ipv4_get_domain(net_protocol* protocol)
1518 {
1519 	return sDomain;
1520 }
1521 
1522 
1523 size_t
1524 ipv4_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1525 {
1526 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1527 	if (route == NULL)
1528 		return 0;
1529 
1530 	size_t mtu;
1531 	if (route->mtu != 0)
1532 		mtu = route->mtu;
1533 	else
1534 		mtu = route->interface->mtu;
1535 
1536 	sDatalinkModule->put_route(sDomain, route);
1537 	return mtu - sizeof(ipv4_header);
1538 }
1539 
1540 
1541 status_t
1542 ipv4_receive_data(net_buffer* buffer)
1543 {
1544 	TRACE("ReceiveData(%p [%ld bytes])", buffer, buffer->size);
1545 
1546 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1547 	if (bufferHeader.Status() != B_OK)
1548 		return bufferHeader.Status();
1549 
1550 	ipv4_header &header = bufferHeader.Data();
1551 	//dump_ipv4_header(header);
1552 
1553 	if (header.version != IP_VERSION)
1554 		return B_BAD_TYPE;
1555 
1556 	uint16 packetLength = header.TotalLength();
1557 	uint16 headerLength = header.HeaderLength();
1558 	if (packetLength > buffer->size
1559 		|| headerLength < sizeof(ipv4_header))
1560 		return B_BAD_DATA;
1561 
1562 	// TODO: would be nice to have a direct checksum function somewhere
1563 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1564 		return B_BAD_DATA;
1565 
1566 	// lower layers notion of Broadcast or Multicast have no relevance to us
1567 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1568 
1569 	sockaddr_in destination;
1570 	fill_sockaddr_in(&destination, header.destination);
1571 
1572 	if (header.destination == INADDR_BROADCAST) {
1573 		buffer->flags |= MSG_BCAST;
1574 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1575 		buffer->flags |= MSG_MCAST;
1576 	} else {
1577 		uint32 matchedAddressType = 0;
1578 
1579 		// test if the packet is really for us
1580 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1581 				&buffer->interface, &matchedAddressType)
1582 			&& !sDatalinkModule->is_local_link_address(sDomain, true,
1583 				buffer->destination, &buffer->interface)) {
1584 			TRACE("  ReceiveData(): packet was not for us %x -> %x",
1585 				ntohl(header.source), ntohl(header.destination));
1586 			return B_ERROR;
1587 		}
1588 
1589 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1590 		buffer->flags |= matchedAddressType;
1591 	}
1592 
1593 	// set net_buffer's source/destination address
1594 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1595 	memcpy(buffer->destination, &destination, sizeof(sockaddr_in));
1596 
1597 	uint8 protocol = buffer->protocol = header.protocol;
1598 
1599 	// remove any trailing/padding data
1600 	status_t status = gBufferModule->trim(buffer, packetLength);
1601 	if (status != B_OK)
1602 		return status;
1603 
1604 	// check for fragmentation
1605 	uint16 fragmentOffset = ntohs(header.fragment_offset);
1606 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1607 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1608 		// this is a fragment
1609 		TRACE("  ReceiveData(): Found a Fragment!");
1610 		status = reassemble_fragments(header, &buffer);
1611 		TRACE("  ReceiveData():  -> %s", strerror(status));
1612 		if (status != B_OK)
1613 			return status;
1614 
1615 		if (buffer == NULL) {
1616 			// buffer was put into fragment packet
1617 			TRACE("  ReceiveData(): Not yet assembled.");
1618 			return B_OK;
1619 		}
1620 	}
1621 
1622 	// Since the buffer might have been changed (reassembled fragment)
1623 	// we must no longer access bufferHeader or header anymore after
1624 	// this point
1625 
1626 	raw_receive_data(buffer);
1627 
1628 	gBufferModule->remove_header(buffer, headerLength);
1629 		// the header is of variable size and may include IP options
1630 		// (that we ignore for now)
1631 
1632 	net_protocol_module_info* module = receiving_protocol(protocol);
1633 	if (module == NULL) {
1634 		// no handler for this packet
1635 		return EAFNOSUPPORT;
1636 	}
1637 
1638 	if ((buffer->flags & MSG_MCAST) != 0) {
1639 		// Unfortunely historical reasons dictate that the IP multicast
1640 		// model be a little different from the unicast one. We deliver
1641 		// this frame directly to all sockets registered with interest
1642 		// for this multicast group.
1643 		return deliver_multicast(module, buffer, false);
1644 	}
1645 
1646 	return module->receive_data(buffer);
1647 }
1648 
1649 
1650 status_t
1651 ipv4_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1652 {
1653 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1654 
1655 	if (protocol->raw == NULL)
1656 		return B_ERROR;
1657 
1658 	return protocol->raw->SocketEnqueue(buffer);
1659 }
1660 
1661 
1662 status_t
1663 ipv4_error(uint32 code, net_buffer* data)
1664 {
1665 	return B_ERROR;
1666 }
1667 
1668 
1669 status_t
1670 ipv4_error_reply(net_protocol* protocol, net_buffer* causedError, uint32 code,
1671 	void* errorData)
1672 {
1673 	return B_ERROR;
1674 }
1675 
1676 
1677 ssize_t
1678 ipv4_process_ancillary_data_no_container(net_protocol* protocol,
1679 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1680 {
1681 	ssize_t bytesWritten = 0;
1682 
1683 	if ((((ipv4_protocol*)protocol)->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0) {
1684 		if (msgControlLen < CMSG_SPACE(sizeof(struct in_addr)))
1685 			return B_NO_MEMORY;
1686 
1687 		cmsghdr* messageHeader = (cmsghdr*)msgControl;
1688 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1689 		messageHeader->cmsg_level = IPPROTO_IP;
1690 		messageHeader->cmsg_type = IP_RECVDSTADDR;
1691 
1692 		memcpy(CMSG_DATA(messageHeader),
1693 		 	&((struct sockaddr_in*)buffer->destination)->sin_addr,
1694 		 	sizeof(struct in_addr));
1695 
1696 		bytesWritten += CMSG_SPACE(sizeof(struct in_addr));
1697 	}
1698 
1699 	return bytesWritten;
1700 }
1701 
1702 
1703 //	#pragma mark -
1704 
1705 
1706 status_t
1707 init_ipv4()
1708 {
1709 	sPacketID = (int32)system_time();
1710 
1711 	mutex_init(&sRawSocketsLock, "raw sockets");
1712 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1713 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1714 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1715 
1716 	status_t status;
1717 
1718 	sMulticastState = new MulticastState();
1719 	if (sMulticastState == NULL) {
1720 		status = B_NO_MEMORY;
1721 		goto err4;
1722 	}
1723 
1724 	status = sMulticastState->Init();
1725 	if (status != B_OK)
1726 		goto err5;
1727 
1728 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1729 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1730 	if (sFragmentHash == NULL)
1731 		goto err5;
1732 
1733 	new (&sRawSockets) RawSocketList;
1734 		// static initializers do not work in the kernel,
1735 		// so we have to do it here, manually
1736 		// TODO: for modules, this shouldn't be required
1737 
1738 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1739 		"network/protocols/ipv4/v1", NULL);
1740 	if (status != B_OK)
1741 		goto err6;
1742 
1743 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1744 		&gIPv4AddressModule, &sDomain);
1745 	if (status != B_OK)
1746 		goto err6;
1747 
1748 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1749 		"list all current IPv4 multicast states");
1750 
1751 	return B_OK;
1752 
1753 err6:
1754 	hash_uninit(sFragmentHash);
1755 err5:
1756 	delete sMulticastState;
1757 err4:
1758 	mutex_destroy(&sReceivingProtocolLock);
1759 	mutex_destroy(&sMulticastGroupsLock);
1760 	mutex_destroy(&sFragmentLock);
1761 	mutex_destroy(&sRawSocketsLock);
1762 	return status;
1763 }
1764 
1765 
1766 status_t
1767 uninit_ipv4()
1768 {
1769 	mutex_lock(&sReceivingProtocolLock);
1770 
1771 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1772 
1773 	// put all the domain receiving protocols we gathered so far
1774 	for (uint32 i = 0; i < 256; i++) {
1775 		if (sReceivingProtocol[i] != NULL)
1776 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1777 	}
1778 
1779 	gStackModule->unregister_domain(sDomain);
1780 	mutex_unlock(&sReceivingProtocolLock);
1781 
1782 	delete sMulticastState;
1783 	hash_uninit(sFragmentHash);
1784 
1785 	mutex_destroy(&sMulticastGroupsLock);
1786 	mutex_destroy(&sFragmentLock);
1787 	mutex_destroy(&sRawSocketsLock);
1788 	mutex_destroy(&sReceivingProtocolLock);
1789 
1790 	return B_OK;
1791 }
1792 
1793 
1794 static status_t
1795 ipv4_std_ops(int32 op, ...)
1796 {
1797 	switch (op) {
1798 		case B_MODULE_INIT:
1799 			return init_ipv4();
1800 		case B_MODULE_UNINIT:
1801 			return uninit_ipv4();
1802 
1803 		default:
1804 			return B_ERROR;
1805 	}
1806 }
1807 
1808 
1809 net_protocol_module_info gIPv4Module = {
1810 	{
1811 		"network/protocols/ipv4/v1",
1812 		0,
1813 		ipv4_std_ops
1814 	},
1815 	NET_PROTOCOL_ATOMIC_MESSAGES,
1816 
1817 	ipv4_init_protocol,
1818 	ipv4_uninit_protocol,
1819 	ipv4_open,
1820 	ipv4_close,
1821 	ipv4_free,
1822 	ipv4_connect,
1823 	ipv4_accept,
1824 	ipv4_control,
1825 	ipv4_getsockopt,
1826 	ipv4_setsockopt,
1827 	ipv4_bind,
1828 	ipv4_unbind,
1829 	ipv4_listen,
1830 	ipv4_shutdown,
1831 	ipv4_send_data,
1832 	ipv4_send_routed_data,
1833 	ipv4_send_avail,
1834 	ipv4_read_data,
1835 	ipv4_read_avail,
1836 	ipv4_get_domain,
1837 	ipv4_get_mtu,
1838 	ipv4_receive_data,
1839 	ipv4_deliver_data,
1840 	ipv4_error,
1841 	ipv4_error_reply,
1842 	NULL,		// add_ancillary_data()
1843 	NULL,		// process_ancillary_data()
1844 	ipv4_process_ancillary_data_no_container,
1845 	NULL,		// send_data_no_buffer()
1846 	NULL		// read_data_no_buffer()
1847 };
1848 
1849 module_dependency module_dependencies[] = {
1850 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1851 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1852 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1853 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1854 	{}
1855 };
1856 
1857 module_info* modules[] = {
1858 	(module_info*)&gIPv4Module,
1859 	NULL
1860 };
1861