xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision 079c69cbfd7cd3c97baae91332251c8388a8bb02)
1 /*
2  * Copyright 2006-2007, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4_address.h"
11 #include "multicast.h"
12 
13 #include <net_datalink.h>
14 #include <net_datalink_protocol.h>
15 #include <net_protocol.h>
16 #include <net_stack.h>
17 #include <NetBufferUtilities.h>
18 #include <ProtocolUtilities.h>
19 
20 #include <ByteOrder.h>
21 #include <KernelExport.h>
22 #include <util/AutoLock.h>
23 #include <util/list.h>
24 #include <util/khash.h>
25 #include <util/DoublyLinkedList.h>
26 #include <util/MultiHashTable.h>
27 
28 #include <netinet/in.h>
29 #include <netinet/ip.h>
30 #include <new>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <utility>
35 
36 
37 //#define TRACE_IPV4
38 #ifdef TRACE_IPV4
39 #	define TRACE(format, args...) \
40 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
41 #	define TRACE_SK(protocol, format, args...) \
42 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
43 			protocol , ##args)
44 #else
45 #	define TRACE(args...)		do { } while (0)
46 #	define TRACE_SK(args...)	do { } while (0)
47 #endif
48 
49 struct ipv4_header {
50 #if B_HOST_IS_LENDIAN == 1
51 	uint8		header_length : 4;	// header length in 32-bit words
52 	uint8		version : 4;
53 #else
54 	uint8		version : 4;
55 	uint8		header_length : 4;
56 #endif
57 	uint8		service_type;
58 	uint16		total_length;
59 	uint16		id;
60 	uint16		fragment_offset;
61 	uint8		time_to_live;
62 	uint8		protocol;
63 	uint16		checksum;
64 	in_addr_t	source;
65 	in_addr_t	destination;
66 
67 	uint16 HeaderLength() const { return header_length << 2; }
68 	uint16 TotalLength() const { return ntohs(total_length); }
69 	uint16 FragmentOffset() const { return ntohs(fragment_offset); }
70 } _PACKED;
71 
72 #define IP_VERSION				4
73 
74 // fragment flags
75 #define IP_RESERVED_FLAG		0x8000
76 #define IP_DONT_FRAGMENT		0x4000
77 #define IP_MORE_FRAGMENTS		0x2000
78 #define IP_FRAGMENT_OFFSET_MASK	0x1fff
79 
80 #define MAX_HASH_FRAGMENTS 		64
81 	// slots in the fragment packet's hash
82 #define FRAGMENT_TIMEOUT		60000000LL
83 	// discard fragment after 60 seconds
84 
85 typedef DoublyLinkedList<struct net_buffer,
86 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
87 
88 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
89 
90 struct ipv4_packet_key {
91 	in_addr_t	source;
92 	in_addr_t	destination;
93 	uint16		id;
94 	uint8		protocol;
95 };
96 
97 class FragmentPacket {
98 	public:
99 		FragmentPacket(const ipv4_packet_key &key);
100 		~FragmentPacket();
101 
102 		status_t AddFragment(uint16 start, uint16 end, net_buffer *buffer,
103 					bool lastFragment);
104 		status_t Reassemble(net_buffer *to);
105 
106 		bool IsComplete() const { return fReceivedLastFragment && fBytesLeft == 0; }
107 
108 		static uint32 Hash(void *_packet, const void *_key, uint32 range);
109 		static int Compare(void *_packet, const void *_key);
110 		static int32 NextOffset() { return offsetof(FragmentPacket, fNext); }
111 		static void StaleTimer(struct net_timer *timer, void *data);
112 
113 	private:
114 		FragmentPacket	*fNext;
115 		struct ipv4_packet_key fKey;
116 		bool			fReceivedLastFragment;
117 		int32			fBytesLeft;
118 		FragmentList	fFragments;
119 		net_timer		fTimer;
120 };
121 
122 
123 class RawSocket : public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
124 	public:
125 		RawSocket(net_socket *socket);
126 };
127 
128 typedef DoublyLinkedList<RawSocket> RawSocketList;
129 
130 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
131 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
132 
133 struct MulticastStateHash {
134 	typedef std::pair<const in_addr *, uint32> KeyType;
135 	typedef IPv4GroupInterface ValueType;
136 
137 	size_t HashKey(const KeyType &key) const
138 		{ return key.first->s_addr ^ key.second; }
139 	size_t Hash(ValueType *value) const
140 		{ return HashKey(std::make_pair(&value->Address(),
141 			value->Interface()->index)); }
142 	bool Compare(const KeyType &key, ValueType *value) const
143 		{ return value->Interface()->index == key.second
144 			&& value->Address().s_addr == key.first->s_addr; }
145 	bool CompareValues(ValueType *value1, ValueType *value2) const
146 		{ return value1->Interface()->index == value2->Interface()->index
147 			&& value1->Address().s_addr == value2->Address().s_addr; }
148 	HashTableLink<ValueType> *GetLink(ValueType *value) const { return value; }
149 };
150 
151 
152 struct ipv4_protocol : net_protocol {
153 	ipv4_protocol()
154 		: multicast_filter(this) {}
155 
156 	RawSocket	*raw;
157 	uint8		service_type;
158 	uint8		time_to_live;
159 	uint8		multicast_time_to_live;
160 	uint32		flags;
161 
162 	IPv4MulticastFilter multicast_filter;
163 };
164 
165 // protocol flags
166 #define IP_FLAG_HEADER_INCLUDED	0x01
167 
168 
169 static const int kDefaultTTL = 254;
170 static const int kDefaultMulticastTTL = 1;
171 
172 
173 extern net_protocol_module_info gIPv4Module;
174 	// we need this in ipv4_std_ops() for registering the AF_INET domain
175 
176 net_stack_module_info *gStackModule;
177 net_buffer_module_info *gBufferModule;
178 
179 static struct net_domain *sDomain;
180 static net_datalink_module_info *sDatalinkModule;
181 static net_socket_module_info *sSocketModule;
182 static int32 sPacketID;
183 static RawSocketList sRawSockets;
184 static benaphore sRawSocketsLock;
185 static benaphore sFragmentLock;
186 static hash_table *sFragmentHash;
187 static benaphore sMulticastGroupsLock;
188 
189 typedef MultiHashTable<MulticastStateHash> MulticastState;
190 static MulticastState *sMulticastState;
191 
192 static net_protocol_module_info *sReceivingProtocol[256];
193 static benaphore sReceivingProtocolLock;
194 
195 
196 static const char *
197 print_address(const in_addr *address, char *buf, size_t bufLen)
198 {
199 	unsigned int addr = ntohl(address->s_addr);
200 
201 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
202 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
203 
204 	return buf;
205 }
206 
207 
208 RawSocket::RawSocket(net_socket *socket)
209 	: DatagramSocket<>("ipv4 raw socket", socket)
210 {
211 }
212 
213 
214 //	#pragma mark -
215 
216 
217 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
218 	:
219 	fKey(key),
220 	fReceivedLastFragment(false),
221 	fBytesLeft(IP_MAXPACKET)
222 {
223 	gStackModule->init_timer(&fTimer, StaleTimer, this);
224 }
225 
226 
227 FragmentPacket::~FragmentPacket()
228 {
229 	// cancel the kill timer
230 	gStackModule->set_timer(&fTimer, -1);
231 
232 	// delete all fragments
233 	net_buffer *buffer;
234 	while ((buffer = fFragments.RemoveHead()) != NULL) {
235 		gBufferModule->free(buffer);
236 	}
237 }
238 
239 
240 status_t
241 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer *buffer,
242 	bool lastFragment)
243 {
244 	// restart the timer
245 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
246 
247 	if (start >= end) {
248 		// invalid fragment
249 		return B_BAD_DATA;
250 	}
251 
252 	// Search for a position in the list to insert the fragment
253 
254 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
255 	net_buffer *previous = NULL;
256 	net_buffer *next = NULL;
257 	while ((previous = iterator.Next()) != NULL) {
258 		if (previous->fragment.start <= start) {
259 			// The new fragment can be inserted after this one
260 			break;
261 		}
262 
263 		next = previous;
264 	}
265 
266 	// See if we already have the fragment's data
267 
268 	if (previous != NULL && previous->fragment.start <= start
269 		&& previous->fragment.end >= end) {
270 		// we do, so we can just drop this fragment
271 		gBufferModule->free(buffer);
272 		return B_OK;
273 	}
274 
275 	TRACE("    previous: %p, next: %p", previous, next);
276 
277 	// If we have parts of the data already, truncate as needed
278 
279 	if (previous != NULL && previous->fragment.end > start) {
280 		TRACE("    remove header %d bytes", previous->fragment.end - start);
281 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
282 		start = previous->fragment.end;
283 	}
284 	if (next != NULL && next->fragment.start < end) {
285 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
286 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
287 		end = next->fragment.start;
288 	}
289 
290 	// Now try if we can already merge the fragments together
291 
292 	// We will always keep the last buffer received, so that we can still
293 	// report an error (in which case we're not responsible for freeing it)
294 
295 	if (previous != NULL && previous->fragment.end == start) {
296 		fFragments.Remove(previous);
297 
298 		buffer->fragment.start = previous->fragment.start;
299 		buffer->fragment.end = end;
300 
301 		status_t status = gBufferModule->merge(buffer, previous, false);
302 		TRACE("    merge previous: %s", strerror(status));
303 		if (status < B_OK) {
304 			fFragments.Insert(next, previous);
305 			return status;
306 		}
307 
308 		fFragments.Insert(next, buffer);
309 
310 		// cut down existing hole
311 		fBytesLeft -= end - start;
312 
313 		if (lastFragment && !fReceivedLastFragment) {
314 			fReceivedLastFragment = true;
315 			fBytesLeft -= IP_MAXPACKET - end;
316 		}
317 
318 		TRACE("    hole length: %d", (int)fBytesLeft);
319 
320 		return B_OK;
321 	} else if (next != NULL && next->fragment.start == end) {
322 		fFragments.Remove(next);
323 
324 		buffer->fragment.start = start;
325 		buffer->fragment.end = next->fragment.end;
326 
327 		status_t status = gBufferModule->merge(buffer, next, true);
328 		TRACE("    merge next: %s", strerror(status));
329 		if (status < B_OK) {
330 			fFragments.Insert((net_buffer *)previous->link.next, next);
331 			return status;
332 		}
333 
334 		fFragments.Insert((net_buffer *)previous->link.next, buffer);
335 
336 		// cut down existing hole
337 		fBytesLeft -= end - start;
338 
339 		if (lastFragment && !fReceivedLastFragment) {
340 			fReceivedLastFragment = true;
341 			fBytesLeft -= IP_MAXPACKET - end;
342 		}
343 
344 		TRACE("    hole length: %d", (int)fBytesLeft);
345 
346 		return B_OK;
347 	}
348 
349 	// We couldn't merge the fragments, so we need to add it as is
350 
351 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
352 
353 	buffer->fragment.start = start;
354 	buffer->fragment.end = end;
355 	fFragments.Insert(next, buffer);
356 
357 	// update length of the hole, if any
358 	fBytesLeft -= end - start;
359 
360 	if (lastFragment && !fReceivedLastFragment) {
361 		fReceivedLastFragment = true;
362 		fBytesLeft -= IP_MAXPACKET - end;
363 	}
364 
365 	TRACE("    hole length: %d", (int)fBytesLeft);
366 
367 	return B_OK;
368 }
369 
370 
371 /*!
372 	Reassembles the fragments to the specified buffer \a to.
373 	This buffer must have been added via AddFragment() before.
374 */
375 status_t
376 FragmentPacket::Reassemble(net_buffer *to)
377 {
378 	if (!IsComplete())
379 		return NULL;
380 
381 	net_buffer *buffer = NULL;
382 
383 	net_buffer *fragment;
384 	while ((fragment = fFragments.RemoveHead()) != NULL) {
385 		if (buffer != NULL) {
386 			status_t status;
387 			if (to == fragment) {
388 				status = gBufferModule->merge(fragment, buffer, false);
389 				buffer = fragment;
390 			} else
391 				status = gBufferModule->merge(buffer, fragment, true);
392 			if (status < B_OK)
393 				return status;
394 		} else
395 			buffer = fragment;
396 	}
397 
398 	if (buffer != to)
399 		panic("ipv4 packet reassembly did not work correctly.\n");
400 
401 	return B_OK;
402 }
403 
404 
405 int
406 FragmentPacket::Compare(void *_packet, const void *_key)
407 {
408 	const ipv4_packet_key *key = (ipv4_packet_key *)_key;
409 	ipv4_packet_key *packetKey = &((FragmentPacket *)_packet)->fKey;
410 
411 	if (packetKey->id == key->id
412 		&& packetKey->source == key->source
413 		&& packetKey->destination == key->destination
414 		&& packetKey->protocol == key->protocol)
415 		return 0;
416 
417 	return 1;
418 }
419 
420 
421 uint32
422 FragmentPacket::Hash(void *_packet, const void *_key, uint32 range)
423 {
424 	const struct ipv4_packet_key *key = (struct ipv4_packet_key *)_key;
425 	FragmentPacket *packet = (FragmentPacket *)_packet;
426 	if (packet != NULL)
427 		key = &packet->fKey;
428 
429 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
430 }
431 
432 
433 /*static*/ void
434 FragmentPacket::StaleTimer(struct net_timer *timer, void *data)
435 {
436 	FragmentPacket *packet = (FragmentPacket *)data;
437 	TRACE("Assembling FragmentPacket %p timed out!", packet);
438 
439 	BenaphoreLocker locker(&sFragmentLock);
440 
441 	hash_remove(sFragmentHash, packet);
442 	delete packet;
443 }
444 
445 
446 //	#pragma mark -
447 
448 
449 #if 0
450 static void
451 dump_ipv4_header(ipv4_header &header)
452 {
453 	struct pretty_ipv4 {
454 	#if B_HOST_IS_LENDIAN == 1
455 		uint8 a;
456 		uint8 b;
457 		uint8 c;
458 		uint8 d;
459 	#else
460 		uint8 d;
461 		uint8 c;
462 		uint8 b;
463 		uint8 a;
464 	#endif
465 	};
466 	struct pretty_ipv4 *src = (struct pretty_ipv4 *)&header.source;
467 	struct pretty_ipv4 *dst = (struct pretty_ipv4 *)&header.destination;
468 	dprintf("  version: %d\n", header.version);
469 	dprintf("  header_length: 4 * %d\n", header.header_length);
470 	dprintf("  service_type: %d\n", header.service_type);
471 	dprintf("  total_length: %d\n", header.TotalLength());
472 	dprintf("  id: %d\n", ntohs(header.id));
473 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
474 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
475 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
476 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
477 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
478 	dprintf("  time_to_live: %d\n", header.time_to_live);
479 	dprintf("  protocol: %d\n", header.protocol);
480 	dprintf("  checksum: %d\n", ntohs(header.checksum));
481 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
482 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
483 }
484 #endif
485 
486 
487 /*!
488 	Attempts to re-assemble fragmented packets.
489 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
490 		will point to its buffer, otherwise, it will be \c NULL.
491 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
492 */
493 static status_t
494 reassemble_fragments(const ipv4_header &header, net_buffer **_buffer)
495 {
496 	net_buffer *buffer = *_buffer;
497 	status_t status;
498 
499 	struct ipv4_packet_key key;
500 	key.source = (in_addr_t)header.source;
501 	key.destination = (in_addr_t)header.destination;
502 	key.id = header.id;
503 	key.protocol = header.protocol;
504 
505 	// TODO: Make locking finer grained.
506 	BenaphoreLocker locker(&sFragmentLock);
507 
508 	FragmentPacket *packet = (FragmentPacket *)hash_lookup(sFragmentHash, &key);
509 	if (packet == NULL) {
510 		// New fragment packet
511 		packet = new (std::nothrow) FragmentPacket(key);
512 		if (packet == NULL)
513 			return B_NO_MEMORY;
514 
515 		// add packet to hash
516 		status = hash_insert(sFragmentHash, packet);
517 		if (status != B_OK) {
518 			delete packet;
519 			return status;
520 		}
521 	}
522 
523 	uint16 fragmentOffset = header.FragmentOffset();
524 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
525 	uint16 end = start + header.TotalLength() - header.HeaderLength();
526 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
527 
528 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
529 		lastFragment ? "last ": "", end - start, start);
530 
531 	// Remove header unless this is the first fragment
532 	if (start != 0)
533 		gBufferModule->remove_header(buffer, header.HeaderLength());
534 
535 	status = packet->AddFragment(start, end, buffer, lastFragment);
536 	if (status != B_OK)
537 		return status;
538 
539 	if (packet->IsComplete()) {
540 		hash_remove(sFragmentHash, packet);
541 			// no matter if reassembling succeeds, we won't need this packet anymore
542 
543 		status = packet->Reassemble(buffer);
544 		delete packet;
545 
546 		// _buffer does not change
547 		return status;
548 	}
549 
550 	// This indicates that the packet is not yet complete
551 	*_buffer = NULL;
552 	return B_OK;
553 }
554 
555 
556 /*!
557 	Fragments the incoming buffer and send all fragments via the specified
558 	\a route.
559 */
560 static status_t
561 send_fragments(ipv4_protocol *protocol, struct net_route *route,
562 	net_buffer *buffer, uint32 mtu)
563 {
564 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
565 
566 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
567 	if (originalHeader.Status() < B_OK)
568 		return originalHeader.Status();
569 
570 	uint16 headerLength = originalHeader->HeaderLength();
571 	uint32 bytesLeft = buffer->size - headerLength;
572 	uint32 fragmentOffset = 0;
573 	status_t status = B_OK;
574 
575 	net_buffer *headerBuffer = gBufferModule->split(buffer, headerLength);
576 	if (headerBuffer == NULL)
577 		return B_NO_MEMORY;
578 
579 	// TODO we need to make sure ipv4_header is contiguous or
580 	//      use another construct.
581 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
582 	ipv4_header *header = &bufferHeader.Data();
583 
584 	// adapt MTU to be a multiple of 8 (fragment offsets can only be specified this way)
585 	mtu -= headerLength;
586 	mtu &= ~7;
587 	dprintf("  adjusted MTU to %ld\n", mtu);
588 
589 	dprintf("  bytesLeft = %ld\n", bytesLeft);
590 	while (bytesLeft > 0) {
591 		uint32 fragmentLength = min_c(bytesLeft, mtu);
592 		bytesLeft -= fragmentLength;
593 		bool lastFragment = bytesLeft == 0;
594 
595 		header->total_length = htons(fragmentLength + headerLength);
596 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
597 			| (fragmentOffset >> 3));
598 		header->checksum = 0;
599 		header->checksum = gStackModule->checksum((uint8 *)header, headerLength);
600 			// TODO: compute the checksum only for those parts that changed?
601 
602 		dprintf("  send fragment of %ld bytes (%ld bytes left)\n", fragmentLength, bytesLeft);
603 
604 		net_buffer *fragmentBuffer;
605 		if (!lastFragment) {
606 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
607 			fragmentOffset += fragmentLength;
608 		} else
609 			fragmentBuffer = buffer;
610 
611 		if (fragmentBuffer == NULL) {
612 			status = B_NO_MEMORY;
613 			break;
614 		}
615 
616 		// copy header to fragment
617 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
618 
619 		// send fragment
620 		if (status == B_OK)
621 			status = sDatalinkModule->send_data(route, fragmentBuffer);
622 
623 		if (lastFragment) {
624 			// we don't own the last buffer, so we don't have to free it
625 			break;
626 		}
627 
628 		if (status < B_OK) {
629 			gBufferModule->free(fragmentBuffer);
630 			break;
631 		}
632 	}
633 
634 	gBufferModule->free(headerBuffer);
635 	return status;
636 }
637 
638 
639 static status_t
640 deliver_multicast(net_protocol_module_info *module, net_buffer *buffer,
641 	bool deliverToRaw)
642 {
643 	if (module->deliver_data == NULL)
644 		return B_OK;
645 
646 	BenaphoreLocker _(sMulticastGroupsLock);
647 
648 	sockaddr_in *multicastAddr = (sockaddr_in *)buffer->destination;
649 
650 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
651 		&multicastAddr->sin_addr, buffer->interface->index));
652 
653 	while (it.HasNext()) {
654 		IPv4GroupInterface *state = it.Next();
655 
656 		if (deliverToRaw && state->Parent()->Socket()->raw == NULL)
657 			continue;
658 
659 		if (state->FilterAccepts(buffer)) {
660 			// as Multicast filters are installed with an IPv4 protocol
661 			// reference, we need to go and find the appropriate instance
662 			// related to the 'receiving protocol' with module 'module'.
663 			net_protocol *proto =
664 				state->Parent()->Socket()->socket->first_protocol;
665 
666 			while (proto && proto->module != module)
667 				proto = proto->next;
668 
669 			if (proto)
670 				module->deliver_data(proto, buffer);
671 		}
672 	}
673 
674 	return B_OK;
675 }
676 
677 
678 static void
679 raw_receive_data(net_buffer *buffer)
680 {
681 	BenaphoreLocker locker(sRawSocketsLock);
682 
683 	if (sRawSockets.IsEmpty())
684 		return;
685 
686 	TRACE("RawReceiveData(%i)", buffer->protocol);
687 
688 	if (buffer->flags & MSG_MCAST) {
689 		// we need to call deliver_multicast here separately as
690 		// buffer still has the IP header, and it won't in the
691 		// next call. This isn't very optimized but works for now.
692 		// A better solution would be to hold separate hash tables
693 		// and lists for RAW and non-RAW sockets.
694 		deliver_multicast(&gIPv4Module, buffer, true);
695 	} else {
696 		RawSocketList::Iterator iterator = sRawSockets.GetIterator();
697 
698 		while (iterator.HasNext()) {
699 			RawSocket *raw = iterator.Next();
700 
701 			if (raw->Socket()->protocol == buffer->protocol)
702 				raw->SocketEnqueue(buffer);
703 		}
704 	}
705 }
706 
707 
708 static sockaddr *
709 fill_sockaddr_in(sockaddr_in *destination, const in_addr &source)
710 {
711 	memset(destination, 0, sizeof(sockaddr_in));
712 	destination->sin_family = AF_INET;
713 	destination->sin_addr = source;
714 	return (sockaddr *)destination;
715 }
716 
717 
718 status_t
719 IPv4Multicast::JoinGroup(IPv4GroupInterface *state)
720 {
721 	BenaphoreLocker _(sMulticastGroupsLock);
722 
723 	sockaddr_in groupAddr;
724 	net_interface *intf = state->Interface();
725 
726 	status_t status = intf->first_info->join_multicast(intf->first_protocol,
727 		fill_sockaddr_in(&groupAddr, state->Address()));
728 	if (status < B_OK)
729 		return status;
730 
731 	sMulticastState->Insert(state);
732 	return B_OK;
733 }
734 
735 
736 status_t
737 IPv4Multicast::LeaveGroup(IPv4GroupInterface *state)
738 {
739 	BenaphoreLocker _(sMulticastGroupsLock);
740 
741 	sMulticastState->Remove(state);
742 
743 	sockaddr_in groupAddr;
744 	net_interface *intf = state->Interface();
745 
746 	return intf->first_protocol->module->join_multicast(intf->first_protocol,
747 		fill_sockaddr_in(&groupAddr, state->Address()));
748 }
749 
750 
751 static net_protocol_module_info *
752 receiving_protocol(uint8 protocol)
753 {
754 	net_protocol_module_info *module = sReceivingProtocol[protocol];
755 	if (module != NULL)
756 		return module;
757 
758 	BenaphoreLocker locker(sReceivingProtocolLock);
759 
760 	module = sReceivingProtocol[protocol];
761 	if (module != NULL)
762 		return module;
763 
764 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol, &module) == B_OK)
765 		sReceivingProtocol[protocol] = module;
766 
767 	return module;
768 }
769 
770 
771 static inline sockaddr *
772 fill_sockaddr_in(sockaddr_in *target, in_addr_t address)
773 {
774 	memset(target, 0, sizeof(sockaddr_in));
775 	target->sin_family = AF_INET;
776 	target->sin_len = sizeof(sockaddr_in);
777 	target->sin_addr.s_addr = address;
778 	return (sockaddr *)target;
779 }
780 
781 
782 static status_t
783 ipv4_delta_group(IPv4GroupInterface *group, int option,
784 	net_interface *interface, const in_addr *sourceAddr)
785 {
786 	switch (option) {
787 		case IP_ADD_MEMBERSHIP:
788 			return group->Add();
789 		case IP_DROP_MEMBERSHIP:
790 			return group->Drop();
791 		case IP_BLOCK_SOURCE:
792 			return group->BlockSource(*sourceAddr);
793 		case IP_UNBLOCK_SOURCE:
794 			return group->UnblockSource(*sourceAddr);
795 		case IP_ADD_SOURCE_MEMBERSHIP:
796 			return group->AddSSM(*sourceAddr);
797 		case IP_DROP_SOURCE_MEMBERSHIP:
798 			return group->DropSSM(*sourceAddr);
799 	}
800 
801 	return B_ERROR;
802 }
803 
804 
805 static status_t
806 ipv4_delta_membership(ipv4_protocol *protocol, int option,
807 	net_interface *interface, const in_addr *groupAddr,
808 	const in_addr *sourceAddr)
809 {
810 	IPv4MulticastFilter &filter = protocol->multicast_filter;
811 	IPv4GroupInterface *state = NULL;
812 	status_t status = B_OK;
813 
814 	switch (option) {
815 		case IP_ADD_MEMBERSHIP:
816 		case IP_ADD_SOURCE_MEMBERSHIP:
817 			status = filter.GetState(*groupAddr, interface, state, true);
818 			break;
819 
820 		case IP_DROP_MEMBERSHIP:
821 		case IP_BLOCK_SOURCE:
822 		case IP_UNBLOCK_SOURCE:
823 		case IP_DROP_SOURCE_MEMBERSHIP:
824 			filter.GetState(*groupAddr, interface, state, false);
825 			if (state == NULL) {
826 				if (option == IP_DROP_MEMBERSHIP
827 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
828 					return EADDRNOTAVAIL;
829 				else
830 					return EINVAL;
831 			}
832 			break;
833 	}
834 
835 	if (status < B_OK)
836 		return status;
837 
838 	status = ipv4_delta_group(state, option, interface, sourceAddr);
839 	filter.ReturnState(state);
840 	return status;
841 }
842 
843 
844 static int
845 generic_to_ipv4(int option)
846 {
847 	switch (option) {
848 		case MCAST_JOIN_GROUP:
849 			return IP_ADD_MEMBERSHIP;
850 		case MCAST_JOIN_SOURCE_GROUP:
851 			return IP_ADD_SOURCE_MEMBERSHIP;
852 		case MCAST_LEAVE_GROUP:
853 			return IP_DROP_MEMBERSHIP;
854 		case MCAST_BLOCK_SOURCE:
855 			return IP_BLOCK_SOURCE;
856 		case MCAST_UNBLOCK_SOURCE:
857 			return IP_UNBLOCK_SOURCE;
858 		case MCAST_LEAVE_SOURCE_GROUP:
859 			return IP_DROP_SOURCE_MEMBERSHIP;
860 	}
861 
862 	return -1;
863 }
864 
865 
866 static net_interface *
867 get_multicast_interface(ipv4_protocol *protocol, const in_addr *address)
868 {
869 	sockaddr_in groupAddr;
870 	net_route *route = sDatalinkModule->get_route(sDomain,
871 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
872 	if (route == NULL)
873 		return NULL;
874 
875 	return route->interface;
876 }
877 
878 
879 static status_t
880 ipv4_delta_membership(ipv4_protocol *protocol, int option,
881 	in_addr *interfaceAddr, in_addr *groupAddr, in_addr *sourceAddr)
882 {
883 	net_interface *interface = NULL;
884 
885 	if (interfaceAddr->s_addr == INADDR_ANY) {
886 		interface = get_multicast_interface(protocol, groupAddr);
887 	} else {
888 		sockaddr_in address;
889 		interface = sDatalinkModule->get_interface_with_address(sDomain,
890 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
891 	}
892 
893 	if (interface == NULL)
894 		return ENODEV;
895 
896 	return ipv4_delta_membership(protocol, option, interface,
897 		groupAddr, sourceAddr);
898 }
899 
900 
901 static status_t
902 ipv4_generic_delta_membership(ipv4_protocol *protocol, int option,
903 	uint32 index, const sockaddr_storage *_groupAddr,
904 	const sockaddr_storage *_sourceAddr)
905 {
906 	if (_groupAddr->ss_family != AF_INET)
907 		return EINVAL;
908 
909 	if (_sourceAddr && _sourceAddr->ss_family != AF_INET)
910 		return EINVAL;
911 
912 	net_interface *interface;
913 	const in_addr *groupAddr, *sourceAddr = NULL;
914 
915 	groupAddr = &((const sockaddr_in *)_groupAddr)->sin_addr;
916 
917 	if (index == 0)
918 		interface = get_multicast_interface(protocol, groupAddr);
919 	else
920 		interface = sDatalinkModule->get_interface(sDomain, index);
921 
922 	if (interface == NULL)
923 		return ENODEV;
924 
925 	if (_sourceAddr)
926 		sourceAddr = &((const sockaddr_in *)_sourceAddr)->sin_addr;
927 
928 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
929 		groupAddr, sourceAddr);
930 }
931 
932 
933 //	#pragma mark -
934 
935 
936 net_protocol *
937 ipv4_init_protocol(net_socket *socket)
938 {
939 	ipv4_protocol *protocol = new (std::nothrow) ipv4_protocol();
940 	if (protocol == NULL)
941 		return NULL;
942 
943 	protocol->raw = NULL;
944 	protocol->service_type = 0;
945 	protocol->time_to_live = kDefaultTTL;
946 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
947 	protocol->flags = 0;
948 	return protocol;
949 }
950 
951 
952 status_t
953 ipv4_uninit_protocol(net_protocol *_protocol)
954 {
955 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
956 
957 	delete protocol->raw;
958 	delete protocol;
959 	return B_OK;
960 }
961 
962 
963 /*!
964 	Since open() is only called on the top level protocol, when we get here
965 	it means we are on a SOCK_RAW socket.
966 */
967 status_t
968 ipv4_open(net_protocol *_protocol)
969 {
970 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
971 
972 	RawSocket *raw = new (std::nothrow) RawSocket(protocol->socket);
973 	if (raw == NULL)
974 		return B_NO_MEMORY;
975 
976 	status_t status = raw->InitCheck();
977 	if (status < B_OK) {
978 		delete raw;
979 		return status;
980 	}
981 
982 	TRACE_SK(protocol, "Open()");
983 
984 	protocol->raw = raw;
985 
986 	BenaphoreLocker locker(sRawSocketsLock);
987 	sRawSockets.Add(raw);
988 	return B_OK;
989 }
990 
991 
992 status_t
993 ipv4_close(net_protocol *_protocol)
994 {
995 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
996 	RawSocket *raw = protocol->raw;
997 	if (raw == NULL)
998 		return B_ERROR;
999 
1000 	TRACE_SK(protocol, "Close()");
1001 
1002 	BenaphoreLocker locker(sRawSocketsLock);
1003 	sRawSockets.Remove(raw);
1004 	delete raw;
1005 	protocol->raw = NULL;
1006 
1007 	return B_OK;
1008 }
1009 
1010 
1011 status_t
1012 ipv4_free(net_protocol *protocol)
1013 {
1014 	return B_OK;
1015 }
1016 
1017 
1018 status_t
1019 ipv4_connect(net_protocol *protocol, const struct sockaddr *address)
1020 {
1021 	return B_ERROR;
1022 }
1023 
1024 
1025 status_t
1026 ipv4_accept(net_protocol *protocol, struct net_socket **_acceptedSocket)
1027 {
1028 	return EOPNOTSUPP;
1029 }
1030 
1031 
1032 static status_t
1033 get_int_option(void *target, size_t length, int value)
1034 {
1035 	if (length != sizeof(int))
1036 		return B_BAD_VALUE;
1037 
1038 	return user_memcpy(target, &value, sizeof(int));
1039 }
1040 
1041 
1042 template<typename Type> static status_t
1043 set_int_option(Type &target, const void *_value, size_t length)
1044 {
1045 	int value;
1046 
1047 	if (length != sizeof(int))
1048 		return B_BAD_VALUE;
1049 
1050 	if (user_memcpy(&value, _value, sizeof(int)) < B_OK)
1051 		return B_BAD_ADDRESS;
1052 
1053 	target = value;
1054 	return B_OK;
1055 }
1056 
1057 
1058 status_t
1059 ipv4_control(net_protocol *_protocol, int level, int option, void *value,
1060 	size_t *_length)
1061 {
1062 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1063 		return sDatalinkModule->control(sDomain, option, value, _length);
1064 
1065 	return B_BAD_VALUE;
1066 }
1067 
1068 
1069 status_t
1070 ipv4_getsockopt(net_protocol *_protocol, int level, int option, void *value,
1071 	int *_length)
1072 {
1073 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1074 
1075 	if (level == IPPROTO_IP) {
1076 		if (option == IP_HDRINCL)
1077 			return get_int_option(value, *_length,
1078 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1079 		else if (option == IP_TTL)
1080 			return get_int_option(value, *_length, protocol->time_to_live);
1081 		else if (option == IP_TOS)
1082 			return get_int_option(value, *_length, protocol->service_type);
1083 		else if (IP_MULTICAST_TTL)
1084 			return get_int_option(value, *_length,
1085 				protocol->multicast_time_to_live);
1086 		else if (option == IP_ADD_MEMBERSHIP
1087 			|| option == IP_DROP_MEMBERSHIP
1088 			|| option == IP_BLOCK_SOURCE
1089 			|| option == IP_UNBLOCK_SOURCE
1090 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1091 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1092 			|| option == MCAST_JOIN_GROUP
1093 			|| option == MCAST_LEAVE_GROUP
1094 			|| option == MCAST_BLOCK_SOURCE
1095 			|| option == MCAST_UNBLOCK_SOURCE
1096 			|| option == MCAST_JOIN_SOURCE_GROUP
1097 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1098 				// RFC 3678, Section 4.1:
1099 				// ``An error of EOPNOTSUPP is returned if these options are
1100 				// used with getsockopt().''
1101 				return EOPNOTSUPP;
1102 		} else {
1103 			dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1104 			return ENOPROTOOPT;
1105 		}
1106 	}
1107 
1108 	return sSocketModule->get_option(protocol->socket, level, option, value,
1109 		_length);
1110 }
1111 
1112 
1113 status_t
1114 ipv4_setsockopt(net_protocol *_protocol, int level, int option,
1115 	const void *value, int length)
1116 {
1117 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1118 
1119 	if (level == IPPROTO_IP) {
1120 		if (option == IP_HDRINCL) {
1121 			int headerIncluded;
1122 			if (length != sizeof(int))
1123 				return B_BAD_VALUE;
1124 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded)) < B_OK)
1125 				return B_BAD_ADDRESS;
1126 
1127 			if (headerIncluded)
1128 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1129 			else
1130 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1131 			return B_OK;
1132 		} else if (option == IP_TTL) {
1133 			return set_int_option(protocol->time_to_live, value, length);
1134 		} else if (option == IP_TOS) {
1135 			return set_int_option(protocol->service_type, value, length);
1136 		} else if (option == IP_MULTICAST_TTL) {
1137 			return set_int_option(protocol->multicast_time_to_live, value,
1138 				length);
1139 		} else if (option == IP_ADD_MEMBERSHIP
1140 			|| option == IP_DROP_MEMBERSHIP) {
1141 			ip_mreq mreq;
1142 			if (length != sizeof(ip_mreq))
1143 				return B_BAD_VALUE;
1144 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) < B_OK)
1145 				return B_BAD_ADDRESS;
1146 
1147 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1148 				&mreq.imr_multiaddr, NULL);
1149 		} else if (option == IP_BLOCK_SOURCE
1150 			|| option == IP_UNBLOCK_SOURCE
1151 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1152 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1153 			ip_mreq_source mreq;
1154 			if (length != sizeof(ip_mreq_source))
1155 				return B_BAD_VALUE;
1156 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) < B_OK)
1157 				return B_BAD_ADDRESS;
1158 
1159 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1160 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1161 		} else if (option == MCAST_LEAVE_GROUP
1162 			|| option == MCAST_JOIN_GROUP) {
1163 			group_req greq;
1164 			if (length != sizeof(group_req))
1165 				return B_BAD_VALUE;
1166 			if (user_memcpy(&greq, value, sizeof(group_req)) < B_OK)
1167 				return B_BAD_ADDRESS;
1168 
1169 			return ipv4_generic_delta_membership(protocol, option,
1170 				greq.gr_interface, &greq.gr_group, NULL);
1171 		} else if (option == MCAST_BLOCK_SOURCE
1172 			|| option == MCAST_UNBLOCK_SOURCE
1173 			|| option == MCAST_JOIN_SOURCE_GROUP
1174 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1175 			group_source_req greq;
1176 			if (length != sizeof(group_source_req))
1177 				return B_BAD_VALUE;
1178 			if (user_memcpy(&greq, value, sizeof(group_source_req)) < B_OK)
1179 				return B_BAD_ADDRESS;
1180 
1181 			return ipv4_generic_delta_membership(protocol, option,
1182 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1183 		} else {
1184 			dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1185 			return ENOPROTOOPT;
1186 		}
1187 	}
1188 
1189 	return sSocketModule->set_option(protocol->socket, level, option,
1190 		value, length);
1191 }
1192 
1193 
1194 status_t
1195 ipv4_bind(net_protocol *protocol, const struct sockaddr *address)
1196 {
1197 	if (address->sa_family != AF_INET)
1198 		return EAFNOSUPPORT;
1199 
1200 	// only INADDR_ANY and addresses of local interfaces are accepted:
1201 	if (((sockaddr_in *)address)->sin_addr.s_addr == INADDR_ANY
1202 		|| IN_MULTICAST(((sockaddr_in *)address)->sin_addr.s_addr)
1203 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1204 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1205 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1206 			// explicitly set length, as our callers can't be trusted to
1207 			// always provide the correct length!
1208 		return B_OK;
1209 	}
1210 
1211 	return B_ERROR;
1212 		// address is unknown on this host
1213 }
1214 
1215 
1216 status_t
1217 ipv4_unbind(net_protocol *protocol, struct sockaddr *address)
1218 {
1219 	// nothing to do here
1220 	return B_OK;
1221 }
1222 
1223 
1224 status_t
1225 ipv4_listen(net_protocol *protocol, int count)
1226 {
1227 	return EOPNOTSUPP;
1228 }
1229 
1230 
1231 status_t
1232 ipv4_shutdown(net_protocol *protocol, int direction)
1233 {
1234 	return EOPNOTSUPP;
1235 }
1236 
1237 
1238 status_t
1239 ipv4_send_routed_data(net_protocol *_protocol, struct net_route *route,
1240 	net_buffer *buffer)
1241 {
1242 	if (route == NULL)
1243 		return B_BAD_VALUE;
1244 
1245 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1246 	net_interface *interface = route->interface;
1247 
1248 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1249 		buffer->size);
1250 
1251 	sockaddr_in &source = *(sockaddr_in *)buffer->source;
1252 	sockaddr_in &destination = *(sockaddr_in *)buffer->destination;
1253 
1254 	bool headerIncluded = false, checksumNeeded = true;
1255 	if (protocol != NULL)
1256 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1257 
1258 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1259 
1260 	if (destination.sin_addr.s_addr == INADDR_ANY)
1261 		return EDESTADDRREQ;
1262 	else if (destination.sin_addr.s_addr == INADDR_BROADCAST) {
1263 		// TODO check for local broadcast addresses as well?
1264 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1265 			return B_BAD_VALUE;
1266 		buffer->flags |= MSG_BCAST;
1267 	} else if (IN_MULTICAST(destination.sin_addr.s_addr)) {
1268 		buffer->flags |= MSG_MCAST;
1269 	}
1270 
1271 	// Add IP header (if needed)
1272 
1273 	if (!headerIncluded) {
1274 		NetBufferPrepend<ipv4_header> header(buffer);
1275 		if (header.Status() < B_OK)
1276 			return header.Status();
1277 
1278 		header->version = IP_VERSION;
1279 		header->header_length = sizeof(ipv4_header) / 4;
1280 		header->service_type = protocol ? protocol->service_type : 0;
1281 		header->total_length = htons(buffer->size);
1282 		header->id = htons(atomic_add(&sPacketID, 1));
1283 		header->fragment_offset = 0;
1284 		if (protocol)
1285 			header->time_to_live = (buffer->flags & MSG_MCAST) ?
1286 				protocol->multicast_time_to_live : protocol->time_to_live;
1287 		else
1288 			header->time_to_live = (buffer->flags & MSG_MCAST) ?
1289 				kDefaultMulticastTTL : kDefaultTTL;
1290 		header->protocol = protocol ? protocol->socket->protocol : buffer->protocol;
1291 		header->checksum = 0;
1292 
1293 		header->source = source.sin_addr.s_addr;
1294 		header->destination = destination.sin_addr.s_addr;
1295 	} else {
1296 		// if IP_HDRINCL, check if the source address is set
1297 		NetBufferHeaderReader<ipv4_header> header(buffer);
1298 		if (header.Status() < B_OK)
1299 			return header.Status();
1300 
1301 		if (header->source == 0) {
1302 			header->source = source.sin_addr.s_addr;
1303 			header->checksum = 0;
1304 			header.Sync();
1305 		} else
1306 			checksumNeeded = false;
1307 	}
1308 
1309 	if (buffer->size > 0xffff)
1310 		return EMSGSIZE;
1311 
1312 	if (checksumNeeded)
1313 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1314 			sizeof(ipv4_header), true);
1315 
1316 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer checksum: %ld",
1317 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1318 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1319 
1320 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08lx",
1321 		ntohl(destination.sin_addr.s_addr));
1322 
1323 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1324 	if (buffer->size > mtu) {
1325 		// we need to fragment the packet
1326 		return send_fragments(protocol, route, buffer, mtu);
1327 	}
1328 
1329 	return sDatalinkModule->send_data(route, buffer);
1330 }
1331 
1332 
1333 status_t
1334 ipv4_send_data(net_protocol *_protocol, net_buffer *buffer)
1335 {
1336 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1337 
1338 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1339 
1340 	if (protocol && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1341 		if (buffer->size < sizeof(ipv4_header))
1342 			return EINVAL;
1343 
1344 		sockaddr_in *source = (sockaddr_in *)buffer->source;
1345 		sockaddr_in *destination = (sockaddr_in *)buffer->destination;
1346 
1347 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1348 			offsetof(ipv4_header, source)>(buffer));
1349 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1350 			offsetof(ipv4_header, destination)>(buffer));
1351 	}
1352 
1353 	return sDatalinkModule->send_datagram(protocol, sDomain, buffer);
1354 }
1355 
1356 
1357 ssize_t
1358 ipv4_send_avail(net_protocol *protocol)
1359 {
1360 	return B_ERROR;
1361 }
1362 
1363 
1364 status_t
1365 ipv4_read_data(net_protocol *_protocol, size_t numBytes, uint32 flags,
1366 	net_buffer **_buffer)
1367 {
1368 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1369 	RawSocket *raw = protocol->raw;
1370 	if (raw == NULL)
1371 		return B_ERROR;
1372 
1373 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1374 
1375 	return raw->SocketDequeue(flags, _buffer);
1376 }
1377 
1378 
1379 ssize_t
1380 ipv4_read_avail(net_protocol *_protocol)
1381 {
1382 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1383 	RawSocket *raw = protocol->raw;
1384 	if (raw == NULL)
1385 		return B_ERROR;
1386 
1387 	return raw->AvailableData();
1388 }
1389 
1390 
1391 struct net_domain *
1392 ipv4_get_domain(net_protocol *protocol)
1393 {
1394 	return sDomain;
1395 }
1396 
1397 
1398 size_t
1399 ipv4_get_mtu(net_protocol *protocol, const struct sockaddr *address)
1400 {
1401 	net_route *route = sDatalinkModule->get_route(sDomain, address);
1402 	if (route == NULL)
1403 		return 0;
1404 
1405 	size_t mtu;
1406 	if (route->mtu != 0)
1407 		mtu = route->mtu;
1408 	else
1409 		mtu = route->interface->mtu;
1410 
1411 	sDatalinkModule->put_route(sDomain, route);
1412 	return mtu - sizeof(ipv4_header);
1413 }
1414 
1415 
1416 status_t
1417 ipv4_receive_data(net_buffer *buffer)
1418 {
1419 	TRACE("ReceiveData(%p [%ld bytes])", buffer, buffer->size);
1420 
1421 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1422 	if (bufferHeader.Status() < B_OK)
1423 		return bufferHeader.Status();
1424 
1425 	ipv4_header &header = bufferHeader.Data();
1426 	//dump_ipv4_header(header);
1427 
1428 	if (header.version != IP_VERSION)
1429 		return B_BAD_TYPE;
1430 
1431 	uint16 packetLength = header.TotalLength();
1432 	uint16 headerLength = header.HeaderLength();
1433 	if (packetLength > buffer->size
1434 		|| headerLength < sizeof(ipv4_header))
1435 		return B_BAD_DATA;
1436 
1437 	// TODO: would be nice to have a direct checksum function somewhere
1438 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1439 		return B_BAD_DATA;
1440 
1441 	struct sockaddr_in &source = *(struct sockaddr_in *)buffer->source;
1442 	struct sockaddr_in &destination = *(struct sockaddr_in *)buffer->destination;
1443 
1444 	fill_sockaddr_in(&source, header.source);
1445 	fill_sockaddr_in(&destination, header.destination);
1446 
1447 	// lower layers notion of Broadcast or Multicast have no relevance to us
1448 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1449 
1450 	if (header.destination == INADDR_BROADCAST) {
1451 		buffer->flags |= MSG_BCAST;
1452 	} else if (IN_MULTICAST(header.destination)) {
1453 		buffer->flags |= MSG_MCAST;
1454 	} else {
1455 		uint32 matchedAddressType = 0;
1456 		// test if the packet is really for us
1457 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1458 			&buffer->interface, &matchedAddressType)) {
1459 			TRACE("  ReceiveData(): packet was not for us %lx -> %lx",
1460 				ntohl(header.source), ntohl(header.destination));
1461 			return B_ERROR;
1462 		}
1463 
1464 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1465 		buffer->flags |= matchedAddressType;
1466 	}
1467 
1468 	uint8 protocol = buffer->protocol = header.protocol;
1469 
1470 	// remove any trailing/padding data
1471 	status_t status = gBufferModule->trim(buffer, packetLength);
1472 	if (status < B_OK)
1473 		return status;
1474 
1475 	// check for fragmentation
1476 	uint16 fragmentOffset = ntohs(header.fragment_offset);
1477 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1478 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1479 		// this is a fragment
1480 		TRACE("  ReceiveData(): Found a Fragment!");
1481 		status = reassemble_fragments(header, &buffer);
1482 		TRACE("  ReceiveData():  -> %s", strerror(status));
1483 		if (status != B_OK)
1484 			return status;
1485 
1486 		if (buffer == NULL) {
1487 			// buffer was put into fragment packet
1488 			TRACE("  ReceiveData(): Not yet assembled.");
1489 			return B_OK;
1490 		}
1491 	}
1492 
1493 	// Since the buffer might have been changed (reassembled fragment)
1494 	// we must no longer access bufferHeader or header anymore after
1495 	// this point
1496 
1497 	raw_receive_data(buffer);
1498 
1499 	gBufferModule->remove_header(buffer, headerLength);
1500 		// the header is of variable size and may include IP options
1501 		// (that we ignore for now)
1502 
1503 	net_protocol_module_info *module = receiving_protocol(protocol);
1504 	if (module == NULL) {
1505 		// no handler for this packet
1506 		return EAFNOSUPPORT;
1507 	}
1508 
1509 	if (buffer->flags & MSG_MCAST) {
1510 		// Unfortunely historical reasons dictate that the IP multicast
1511 		// model be a little different from the unicast one. We deliver
1512 		// this frame directly to all sockets registered with interest
1513 		// for this multicast group.
1514 		return deliver_multicast(module, buffer, false);
1515 	}
1516 
1517 	return module->receive_data(buffer);
1518 }
1519 
1520 
1521 status_t
1522 ipv4_deliver_data(net_protocol *_protocol, net_buffer *buffer)
1523 {
1524 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1525 
1526 	if (protocol->raw == NULL)
1527 		return B_ERROR;
1528 
1529 	return protocol->raw->SocketEnqueue(buffer);
1530 }
1531 
1532 
1533 status_t
1534 ipv4_error(uint32 code, net_buffer *data)
1535 {
1536 	return B_ERROR;
1537 }
1538 
1539 
1540 status_t
1541 ipv4_error_reply(net_protocol *protocol, net_buffer *causedError, uint32 code,
1542 	void *errorData)
1543 {
1544 	return B_ERROR;
1545 }
1546 
1547 
1548 static int
1549 dump_ipv4_multicast(int argc, char *argv[])
1550 {
1551 	MulticastState::Iterator it = sMulticastState->GetIterator();
1552 
1553 	while (it.HasNext()) {
1554 		IPv4GroupInterface *state = it.Next();
1555 
1556 		char addrBuf[64];
1557 
1558 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
1559 			print_address(&state->Address(), addrBuf, sizeof(addrBuf)),
1560 			state->Mode() == IPv4GroupInterface::kExclude ?  "Exclude" :
1561 			"Include");
1562 
1563 		int count = 0;
1564 		IPv4GroupInterface::AddressSet::Iterator it =
1565 			state->Sources().GetIterator();
1566 		while (it.HasNext()) {
1567 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
1568 				addrBuf, sizeof(addrBuf)));
1569 			count++;
1570 		}
1571 
1572 		kprintf("}> sock %p\n", state->Parent()->Socket());
1573 	}
1574 
1575 	return 0;
1576 }
1577 
1578 
1579 //	#pragma mark -
1580 
1581 
1582 status_t
1583 init_ipv4()
1584 {
1585 	sPacketID = (int32)system_time();
1586 
1587 	status_t status = benaphore_init(&sRawSocketsLock, "raw sockets");
1588 	if (status < B_OK)
1589 		return status;
1590 
1591 	status = benaphore_init(&sFragmentLock, "IPv4 Fragments");
1592 	if (status < B_OK)
1593 		goto err1;
1594 
1595 	status = benaphore_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1596 	if (status < B_OK)
1597 		goto err2;
1598 
1599 	status = benaphore_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1600 	if (status < B_OK)
1601 		goto err3;
1602 
1603 	sMulticastState = new MulticastState();
1604 	if (sMulticastState == NULL)
1605 		goto err4;
1606 
1607 	status = sMulticastState->InitCheck();
1608 	if (status < B_OK)
1609 		goto err5;
1610 
1611 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1612 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1613 	if (sFragmentHash == NULL)
1614 		goto err5;
1615 
1616 	new (&sRawSockets) RawSocketList;
1617 		// static initializers do not work in the kernel,
1618 		// so we have to do it here, manually
1619 		// TODO: for modules, this shouldn't be required
1620 
1621 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1622 		"network/protocols/ipv4/v1", NULL);
1623 	if (status < B_OK)
1624 		goto err6;
1625 
1626 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1627 		&gIPv4AddressModule, &sDomain);
1628 	if (status < B_OK)
1629 		goto err6;
1630 
1631 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1632 		"list all current IPv4 multicast states");
1633 
1634 	return B_OK;
1635 
1636 err6:
1637 	hash_uninit(sFragmentHash);
1638 err5:
1639 	delete sMulticastState;
1640 err4:
1641 	benaphore_destroy(&sReceivingProtocolLock);
1642 err3:
1643 	benaphore_destroy(&sMulticastGroupsLock);
1644 err2:
1645 	benaphore_destroy(&sFragmentLock);
1646 err1:
1647 	benaphore_destroy(&sRawSocketsLock);
1648 	return status;
1649 }
1650 
1651 
1652 status_t
1653 uninit_ipv4()
1654 {
1655 	benaphore_lock(&sReceivingProtocolLock);
1656 
1657 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1658 
1659 	// put all the domain receiving protocols we gathered so far
1660 	for (uint32 i = 0; i < 256; i++) {
1661 		if (sReceivingProtocol[i] != NULL)
1662 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1663 	}
1664 
1665 	gStackModule->unregister_domain(sDomain);
1666 	benaphore_unlock(&sReceivingProtocolLock);
1667 
1668 	delete sMulticastState;
1669 	hash_uninit(sFragmentHash);
1670 
1671 	benaphore_destroy(&sMulticastGroupsLock);
1672 	benaphore_destroy(&sFragmentLock);
1673 	benaphore_destroy(&sRawSocketsLock);
1674 	benaphore_destroy(&sReceivingProtocolLock);
1675 
1676 	return B_OK;
1677 }
1678 
1679 
1680 static status_t
1681 ipv4_std_ops(int32 op, ...)
1682 {
1683 	switch (op) {
1684 		case B_MODULE_INIT:
1685 			return init_ipv4();
1686 		case B_MODULE_UNINIT:
1687 			return uninit_ipv4();
1688 
1689 		default:
1690 			return B_ERROR;
1691 	}
1692 }
1693 
1694 
1695 net_protocol_module_info gIPv4Module = {
1696 	{
1697 		"network/protocols/ipv4/v1",
1698 		0,
1699 		ipv4_std_ops
1700 	},
1701 	ipv4_init_protocol,
1702 	ipv4_uninit_protocol,
1703 	ipv4_open,
1704 	ipv4_close,
1705 	ipv4_free,
1706 	ipv4_connect,
1707 	ipv4_accept,
1708 	ipv4_control,
1709 	ipv4_getsockopt,
1710 	ipv4_setsockopt,
1711 	ipv4_bind,
1712 	ipv4_unbind,
1713 	ipv4_listen,
1714 	ipv4_shutdown,
1715 	ipv4_send_data,
1716 	ipv4_send_routed_data,
1717 	ipv4_send_avail,
1718 	ipv4_read_data,
1719 	ipv4_read_avail,
1720 	ipv4_get_domain,
1721 	ipv4_get_mtu,
1722 	ipv4_receive_data,
1723 	ipv4_deliver_data,
1724 	ipv4_error,
1725 	ipv4_error_reply,
1726 };
1727 
1728 module_dependency module_dependencies[] = {
1729 	{NET_STACK_MODULE_NAME, (module_info **)&gStackModule},
1730 	{NET_BUFFER_MODULE_NAME, (module_info **)&gBufferModule},
1731 	{NET_DATALINK_MODULE_NAME, (module_info **)&sDatalinkModule},
1732 	{NET_SOCKET_MODULE_NAME, (module_info **)&sSocketModule},
1733 	{}
1734 };
1735 
1736 module_info *modules[] = {
1737 	(module_info *)&gIPv4Module,
1738 	NULL
1739 };
1740