xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision 746cac055adc6ac3308c7bc2d29040fb95689cc9)
1 /*
2  * Copyright 2006-2008, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4_address.h"
11 #include "multicast.h"
12 
13 #include <net_datalink.h>
14 #include <net_datalink_protocol.h>
15 #include <net_protocol.h>
16 #include <net_stack.h>
17 #include <NetBufferUtilities.h>
18 #include <ProtocolUtilities.h>
19 
20 #include <ByteOrder.h>
21 #include <KernelExport.h>
22 #include <util/AutoLock.h>
23 #include <util/list.h>
24 #include <util/khash.h>
25 #include <util/DoublyLinkedList.h>
26 #include <util/MultiHashTable.h>
27 
28 #include <netinet/in.h>
29 #include <netinet/ip.h>
30 #include <new>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <utility>
35 
36 
37 //#define TRACE_IPV4
38 #ifdef TRACE_IPV4
39 #	define TRACE(format, args...) \
40 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
41 #	define TRACE_SK(protocol, format, args...) \
42 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
43 			protocol , ##args)
44 #else
45 #	define TRACE(args...)		do { } while (0)
46 #	define TRACE_SK(args...)	do { } while (0)
47 #endif
48 
49 struct ipv4_header {
50 #if B_HOST_IS_LENDIAN == 1
51 	uint8		header_length : 4;	// header length in 32-bit words
52 	uint8		version : 4;
53 #else
54 	uint8		version : 4;
55 	uint8		header_length : 4;
56 #endif
57 	uint8		service_type;
58 	uint16		total_length;
59 	uint16		id;
60 	uint16		fragment_offset;
61 	uint8		time_to_live;
62 	uint8		protocol;
63 	uint16		checksum;
64 	in_addr_t	source;
65 	in_addr_t	destination;
66 
67 	uint16 HeaderLength() const { return header_length << 2; }
68 	uint16 TotalLength() const { return ntohs(total_length); }
69 	uint16 FragmentOffset() const { return ntohs(fragment_offset); }
70 } _PACKED;
71 
72 #define IP_VERSION				4
73 
74 // fragment flags
75 #define IP_RESERVED_FLAG		0x8000
76 #define IP_DONT_FRAGMENT		0x4000
77 #define IP_MORE_FRAGMENTS		0x2000
78 #define IP_FRAGMENT_OFFSET_MASK	0x1fff
79 
80 #define MAX_HASH_FRAGMENTS 		64
81 	// slots in the fragment packet's hash
82 #define FRAGMENT_TIMEOUT		60000000LL
83 	// discard fragment after 60 seconds
84 
85 typedef DoublyLinkedList<struct net_buffer,
86 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
87 
88 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
89 
90 struct ipv4_packet_key {
91 	in_addr_t	source;
92 	in_addr_t	destination;
93 	uint16		id;
94 	uint8		protocol;
95 };
96 
97 class FragmentPacket {
98 	public:
99 		FragmentPacket(const ipv4_packet_key &key);
100 		~FragmentPacket();
101 
102 		status_t AddFragment(uint16 start, uint16 end, net_buffer *buffer,
103 					bool lastFragment);
104 		status_t Reassemble(net_buffer *to);
105 
106 		bool IsComplete() const { return fReceivedLastFragment && fBytesLeft == 0; }
107 
108 		static uint32 Hash(void *_packet, const void *_key, uint32 range);
109 		static int Compare(void *_packet, const void *_key);
110 		static int32 NextOffset() { return offsetof(FragmentPacket, fNext); }
111 		static void StaleTimer(struct net_timer *timer, void *data);
112 
113 	private:
114 		FragmentPacket	*fNext;
115 		struct ipv4_packet_key fKey;
116 		bool			fReceivedLastFragment;
117 		int32			fBytesLeft;
118 		FragmentList	fFragments;
119 		net_timer		fTimer;
120 };
121 
122 
123 class RawSocket : public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
124 	public:
125 		RawSocket(net_socket *socket);
126 };
127 
128 typedef DoublyLinkedList<RawSocket> RawSocketList;
129 
130 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
131 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
132 
133 struct MulticastStateHash {
134 	typedef std::pair<const in_addr *, uint32> KeyType;
135 	typedef IPv4GroupInterface ValueType;
136 
137 	size_t HashKey(const KeyType &key) const
138 		{ return key.first->s_addr ^ key.second; }
139 	size_t Hash(ValueType *value) const
140 		{ return HashKey(std::make_pair(&value->Address(),
141 			value->Interface()->index)); }
142 	bool Compare(const KeyType &key, ValueType *value) const
143 		{ return value->Interface()->index == key.second
144 			&& value->Address().s_addr == key.first->s_addr; }
145 	bool CompareValues(ValueType *value1, ValueType *value2) const
146 		{ return value1->Interface()->index == value2->Interface()->index
147 			&& value1->Address().s_addr == value2->Address().s_addr; }
148 	HashTableLink<ValueType> *GetLink(ValueType *value) const { return value; }
149 };
150 
151 
152 struct ipv4_protocol : net_protocol {
153 	ipv4_protocol()
154 		: multicast_filter(this) {}
155 
156 	RawSocket	*raw;
157 	uint8		service_type;
158 	uint8		time_to_live;
159 	uint8		multicast_time_to_live;
160 	uint32		flags;
161 
162 	IPv4MulticastFilter multicast_filter;
163 };
164 
165 // protocol flags
166 #define IP_FLAG_HEADER_INCLUDED	0x01
167 
168 
169 static const int kDefaultTTL = 254;
170 static const int kDefaultMulticastTTL = 1;
171 
172 
173 extern net_protocol_module_info gIPv4Module;
174 	// we need this in ipv4_std_ops() for registering the AF_INET domain
175 
176 net_stack_module_info *gStackModule;
177 net_buffer_module_info *gBufferModule;
178 
179 static struct net_domain *sDomain;
180 static net_datalink_module_info *sDatalinkModule;
181 static net_socket_module_info *sSocketModule;
182 static int32 sPacketID;
183 static RawSocketList sRawSockets;
184 static mutex sRawSocketsLock;
185 static mutex sFragmentLock;
186 static hash_table *sFragmentHash;
187 static mutex sMulticastGroupsLock;
188 
189 typedef MultiHashTable<MulticastStateHash> MulticastState;
190 static MulticastState *sMulticastState;
191 
192 static net_protocol_module_info *sReceivingProtocol[256];
193 static mutex sReceivingProtocolLock;
194 
195 
196 static const char *
197 print_address(const in_addr *address, char *buf, size_t bufLen)
198 {
199 	unsigned int addr = ntohl(address->s_addr);
200 
201 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
202 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
203 
204 	return buf;
205 }
206 
207 
208 RawSocket::RawSocket(net_socket *socket)
209 	: DatagramSocket<>("ipv4 raw socket", socket)
210 {
211 }
212 
213 
214 //	#pragma mark -
215 
216 
217 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
218 	:
219 	fKey(key),
220 	fReceivedLastFragment(false),
221 	fBytesLeft(IP_MAXPACKET)
222 {
223 	gStackModule->init_timer(&fTimer, StaleTimer, this);
224 }
225 
226 
227 FragmentPacket::~FragmentPacket()
228 {
229 	// cancel the kill timer
230 	gStackModule->set_timer(&fTimer, -1);
231 
232 	// delete all fragments
233 	net_buffer *buffer;
234 	while ((buffer = fFragments.RemoveHead()) != NULL) {
235 		gBufferModule->free(buffer);
236 	}
237 }
238 
239 
240 status_t
241 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer *buffer,
242 	bool lastFragment)
243 {
244 	// restart the timer
245 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
246 
247 	if (start >= end) {
248 		// invalid fragment
249 		return B_BAD_DATA;
250 	}
251 
252 	// Search for a position in the list to insert the fragment
253 
254 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
255 	net_buffer *previous = NULL;
256 	net_buffer *next = NULL;
257 	while ((previous = iterator.Next()) != NULL) {
258 		if (previous->fragment.start <= start) {
259 			// The new fragment can be inserted after this one
260 			break;
261 		}
262 
263 		next = previous;
264 	}
265 
266 	// See if we already have the fragment's data
267 
268 	if (previous != NULL && previous->fragment.start <= start
269 		&& previous->fragment.end >= end) {
270 		// we do, so we can just drop this fragment
271 		gBufferModule->free(buffer);
272 		return B_OK;
273 	}
274 
275 	TRACE("    previous: %p, next: %p", previous, next);
276 
277 	// If we have parts of the data already, truncate as needed
278 
279 	if (previous != NULL && previous->fragment.end > start) {
280 		TRACE("    remove header %d bytes", previous->fragment.end - start);
281 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
282 		start = previous->fragment.end;
283 	}
284 	if (next != NULL && next->fragment.start < end) {
285 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
286 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
287 		end = next->fragment.start;
288 	}
289 
290 	// Now try if we can already merge the fragments together
291 
292 	// We will always keep the last buffer received, so that we can still
293 	// report an error (in which case we're not responsible for freeing it)
294 
295 	if (previous != NULL && previous->fragment.end == start) {
296 		fFragments.Remove(previous);
297 
298 		buffer->fragment.start = previous->fragment.start;
299 		buffer->fragment.end = end;
300 
301 		status_t status = gBufferModule->merge(buffer, previous, false);
302 		TRACE("    merge previous: %s", strerror(status));
303 		if (status < B_OK) {
304 			fFragments.Insert(next, previous);
305 			return status;
306 		}
307 
308 		fFragments.Insert(next, buffer);
309 
310 		// cut down existing hole
311 		fBytesLeft -= end - start;
312 
313 		if (lastFragment && !fReceivedLastFragment) {
314 			fReceivedLastFragment = true;
315 			fBytesLeft -= IP_MAXPACKET - end;
316 		}
317 
318 		TRACE("    hole length: %d", (int)fBytesLeft);
319 
320 		return B_OK;
321 	} else if (next != NULL && next->fragment.start == end) {
322 		fFragments.Remove(next);
323 
324 		buffer->fragment.start = start;
325 		buffer->fragment.end = next->fragment.end;
326 
327 		status_t status = gBufferModule->merge(buffer, next, true);
328 		TRACE("    merge next: %s", strerror(status));
329 		if (status < B_OK) {
330 			fFragments.Insert((net_buffer *)previous->link.next, next);
331 			return status;
332 		}
333 
334 		fFragments.Insert((net_buffer *)previous->link.next, buffer);
335 
336 		// cut down existing hole
337 		fBytesLeft -= end - start;
338 
339 		if (lastFragment && !fReceivedLastFragment) {
340 			fReceivedLastFragment = true;
341 			fBytesLeft -= IP_MAXPACKET - end;
342 		}
343 
344 		TRACE("    hole length: %d", (int)fBytesLeft);
345 
346 		return B_OK;
347 	}
348 
349 	// We couldn't merge the fragments, so we need to add it as is
350 
351 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
352 
353 	buffer->fragment.start = start;
354 	buffer->fragment.end = end;
355 	fFragments.Insert(next, buffer);
356 
357 	// update length of the hole, if any
358 	fBytesLeft -= end - start;
359 
360 	if (lastFragment && !fReceivedLastFragment) {
361 		fReceivedLastFragment = true;
362 		fBytesLeft -= IP_MAXPACKET - end;
363 	}
364 
365 	TRACE("    hole length: %d", (int)fBytesLeft);
366 
367 	return B_OK;
368 }
369 
370 
371 /*!	Reassembles the fragments to the specified buffer \a to.
372 	This buffer must have been added via AddFragment() before.
373 */
374 status_t
375 FragmentPacket::Reassemble(net_buffer *to)
376 {
377 	if (!IsComplete())
378 		return B_ERROR;
379 
380 	net_buffer *buffer = NULL;
381 
382 	net_buffer *fragment;
383 	while ((fragment = fFragments.RemoveHead()) != NULL) {
384 		if (buffer != NULL) {
385 			status_t status;
386 			if (to == fragment) {
387 				status = gBufferModule->merge(fragment, buffer, false);
388 				buffer = fragment;
389 			} else
390 				status = gBufferModule->merge(buffer, fragment, true);
391 			if (status < B_OK)
392 				return status;
393 		} else
394 			buffer = fragment;
395 	}
396 
397 	if (buffer != to)
398 		panic("ipv4 packet reassembly did not work correctly.\n");
399 
400 	return B_OK;
401 }
402 
403 
404 int
405 FragmentPacket::Compare(void *_packet, const void *_key)
406 {
407 	const ipv4_packet_key *key = (ipv4_packet_key *)_key;
408 	ipv4_packet_key *packetKey = &((FragmentPacket *)_packet)->fKey;
409 
410 	if (packetKey->id == key->id
411 		&& packetKey->source == key->source
412 		&& packetKey->destination == key->destination
413 		&& packetKey->protocol == key->protocol)
414 		return 0;
415 
416 	return 1;
417 }
418 
419 
420 uint32
421 FragmentPacket::Hash(void *_packet, const void *_key, uint32 range)
422 {
423 	const struct ipv4_packet_key *key = (struct ipv4_packet_key *)_key;
424 	FragmentPacket *packet = (FragmentPacket *)_packet;
425 	if (packet != NULL)
426 		key = &packet->fKey;
427 
428 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
429 }
430 
431 
432 /*static*/ void
433 FragmentPacket::StaleTimer(struct net_timer *timer, void *data)
434 {
435 	FragmentPacket *packet = (FragmentPacket *)data;
436 	TRACE("Assembling FragmentPacket %p timed out!", packet);
437 
438 	MutexLocker locker(&sFragmentLock);
439 
440 	hash_remove(sFragmentHash, packet);
441 	delete packet;
442 }
443 
444 
445 //	#pragma mark -
446 
447 
448 #if 0
449 static void
450 dump_ipv4_header(ipv4_header &header)
451 {
452 	struct pretty_ipv4 {
453 	#if B_HOST_IS_LENDIAN == 1
454 		uint8 a;
455 		uint8 b;
456 		uint8 c;
457 		uint8 d;
458 	#else
459 		uint8 d;
460 		uint8 c;
461 		uint8 b;
462 		uint8 a;
463 	#endif
464 	};
465 	struct pretty_ipv4 *src = (struct pretty_ipv4 *)&header.source;
466 	struct pretty_ipv4 *dst = (struct pretty_ipv4 *)&header.destination;
467 	dprintf("  version: %d\n", header.version);
468 	dprintf("  header_length: 4 * %d\n", header.header_length);
469 	dprintf("  service_type: %d\n", header.service_type);
470 	dprintf("  total_length: %d\n", header.TotalLength());
471 	dprintf("  id: %d\n", ntohs(header.id));
472 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
473 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
474 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
475 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
476 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
477 	dprintf("  time_to_live: %d\n", header.time_to_live);
478 	dprintf("  protocol: %d\n", header.protocol);
479 	dprintf("  checksum: %d\n", ntohs(header.checksum));
480 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
481 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
482 }
483 #endif
484 
485 
486 /*!
487 	Attempts to re-assemble fragmented packets.
488 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
489 		will point to its buffer, otherwise, it will be \c NULL.
490 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
491 */
492 static status_t
493 reassemble_fragments(const ipv4_header &header, net_buffer **_buffer)
494 {
495 	net_buffer *buffer = *_buffer;
496 	status_t status;
497 
498 	struct ipv4_packet_key key;
499 	key.source = (in_addr_t)header.source;
500 	key.destination = (in_addr_t)header.destination;
501 	key.id = header.id;
502 	key.protocol = header.protocol;
503 
504 	// TODO: Make locking finer grained.
505 	MutexLocker locker(&sFragmentLock);
506 
507 	FragmentPacket *packet = (FragmentPacket *)hash_lookup(sFragmentHash, &key);
508 	if (packet == NULL) {
509 		// New fragment packet
510 		packet = new (std::nothrow) FragmentPacket(key);
511 		if (packet == NULL)
512 			return B_NO_MEMORY;
513 
514 		// add packet to hash
515 		status = hash_insert(sFragmentHash, packet);
516 		if (status != B_OK) {
517 			delete packet;
518 			return status;
519 		}
520 	}
521 
522 	uint16 fragmentOffset = header.FragmentOffset();
523 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
524 	uint16 end = start + header.TotalLength() - header.HeaderLength();
525 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
526 
527 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
528 		lastFragment ? "last ": "", end - start, start);
529 
530 	// Remove header unless this is the first fragment
531 	if (start != 0)
532 		gBufferModule->remove_header(buffer, header.HeaderLength());
533 
534 	status = packet->AddFragment(start, end, buffer, lastFragment);
535 	if (status != B_OK)
536 		return status;
537 
538 	if (packet->IsComplete()) {
539 		hash_remove(sFragmentHash, packet);
540 			// no matter if reassembling succeeds, we won't need this packet anymore
541 
542 		status = packet->Reassemble(buffer);
543 		delete packet;
544 
545 		// _buffer does not change
546 		return status;
547 	}
548 
549 	// This indicates that the packet is not yet complete
550 	*_buffer = NULL;
551 	return B_OK;
552 }
553 
554 
555 /*!
556 	Fragments the incoming buffer and send all fragments via the specified
557 	\a route.
558 */
559 static status_t
560 send_fragments(ipv4_protocol *protocol, struct net_route *route,
561 	net_buffer *buffer, uint32 mtu)
562 {
563 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
564 
565 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
566 	if (originalHeader.Status() < B_OK)
567 		return originalHeader.Status();
568 
569 	uint16 headerLength = originalHeader->HeaderLength();
570 	uint32 bytesLeft = buffer->size - headerLength;
571 	uint32 fragmentOffset = 0;
572 	status_t status = B_OK;
573 
574 	net_buffer *headerBuffer = gBufferModule->split(buffer, headerLength);
575 	if (headerBuffer == NULL)
576 		return B_NO_MEMORY;
577 
578 	// TODO we need to make sure ipv4_header is contiguous or
579 	//      use another construct.
580 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
581 	ipv4_header *header = &bufferHeader.Data();
582 
583 	// adapt MTU to be a multiple of 8 (fragment offsets can only be specified this way)
584 	mtu -= headerLength;
585 	mtu &= ~7;
586 	TRACE("  adjusted MTU to %ld\n", mtu);
587 
588 	TRACE("  bytesLeft = %ld\n", bytesLeft);
589 	while (bytesLeft > 0) {
590 		uint32 fragmentLength = min_c(bytesLeft, mtu);
591 		bytesLeft -= fragmentLength;
592 		bool lastFragment = bytesLeft == 0;
593 
594 		header->total_length = htons(fragmentLength + headerLength);
595 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
596 			| (fragmentOffset >> 3));
597 		header->checksum = 0;
598 		header->checksum = gStackModule->checksum((uint8 *)header, headerLength);
599 			// TODO: compute the checksum only for those parts that changed?
600 
601 		TRACE("  send fragment of %ld bytes (%ld bytes left)\n", fragmentLength, bytesLeft);
602 
603 		net_buffer *fragmentBuffer;
604 		if (!lastFragment) {
605 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
606 			fragmentOffset += fragmentLength;
607 		} else
608 			fragmentBuffer = buffer;
609 
610 		if (fragmentBuffer == NULL) {
611 			status = B_NO_MEMORY;
612 			break;
613 		}
614 
615 		// copy header to fragment
616 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
617 
618 		// send fragment
619 		if (status == B_OK)
620 			status = sDatalinkModule->send_data(route, fragmentBuffer);
621 
622 		if (lastFragment) {
623 			// we don't own the last buffer, so we don't have to free it
624 			break;
625 		}
626 
627 		if (status < B_OK) {
628 			gBufferModule->free(fragmentBuffer);
629 			break;
630 		}
631 	}
632 
633 	gBufferModule->free(headerBuffer);
634 	return status;
635 }
636 
637 
638 static status_t
639 deliver_multicast(net_protocol_module_info *module, net_buffer *buffer,
640 	bool deliverToRaw)
641 {
642 	if (module->deliver_data == NULL)
643 		return B_OK;
644 
645 	MutexLocker _(sMulticastGroupsLock);
646 
647 	sockaddr_in *multicastAddr = (sockaddr_in *)buffer->destination;
648 
649 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
650 		&multicastAddr->sin_addr, buffer->interface->index));
651 
652 	while (it.HasNext()) {
653 		IPv4GroupInterface *state = it.Next();
654 
655 		if (deliverToRaw && state->Parent()->Socket()->raw == NULL)
656 			continue;
657 
658 		if (state->FilterAccepts(buffer)) {
659 			// as Multicast filters are installed with an IPv4 protocol
660 			// reference, we need to go and find the appropriate instance
661 			// related to the 'receiving protocol' with module 'module'.
662 			net_protocol *proto =
663 				state->Parent()->Socket()->socket->first_protocol;
664 
665 			while (proto && proto->module != module)
666 				proto = proto->next;
667 
668 			if (proto)
669 				module->deliver_data(proto, buffer);
670 		}
671 	}
672 
673 	return B_OK;
674 }
675 
676 
677 static void
678 raw_receive_data(net_buffer *buffer)
679 {
680 	MutexLocker locker(sRawSocketsLock);
681 
682 	if (sRawSockets.IsEmpty())
683 		return;
684 
685 	TRACE("RawReceiveData(%i)", buffer->protocol);
686 
687 	if (buffer->flags & MSG_MCAST) {
688 		// we need to call deliver_multicast here separately as
689 		// buffer still has the IP header, and it won't in the
690 		// next call. This isn't very optimized but works for now.
691 		// A better solution would be to hold separate hash tables
692 		// and lists for RAW and non-RAW sockets.
693 		deliver_multicast(&gIPv4Module, buffer, true);
694 	} else {
695 		RawSocketList::Iterator iterator = sRawSockets.GetIterator();
696 
697 		while (iterator.HasNext()) {
698 			RawSocket *raw = iterator.Next();
699 
700 			if (raw->Socket()->protocol == buffer->protocol)
701 				raw->SocketEnqueue(buffer);
702 		}
703 	}
704 }
705 
706 
707 static sockaddr *
708 fill_sockaddr_in(sockaddr_in *destination, const in_addr &source)
709 {
710 	memset(destination, 0, sizeof(sockaddr_in));
711 	destination->sin_family = AF_INET;
712 	destination->sin_addr = source;
713 	return (sockaddr *)destination;
714 }
715 
716 
717 status_t
718 IPv4Multicast::JoinGroup(IPv4GroupInterface *state)
719 {
720 	MutexLocker _(sMulticastGroupsLock);
721 
722 	sockaddr_in groupAddr;
723 	net_interface *intf = state->Interface();
724 
725 	status_t status = intf->first_info->join_multicast(intf->first_protocol,
726 		fill_sockaddr_in(&groupAddr, state->Address()));
727 	if (status < B_OK)
728 		return status;
729 
730 	sMulticastState->Insert(state);
731 	return B_OK;
732 }
733 
734 
735 status_t
736 IPv4Multicast::LeaveGroup(IPv4GroupInterface *state)
737 {
738 	MutexLocker _(sMulticastGroupsLock);
739 
740 	sMulticastState->Remove(state);
741 
742 	sockaddr_in groupAddr;
743 	net_interface *intf = state->Interface();
744 
745 	return intf->first_protocol->module->join_multicast(intf->first_protocol,
746 		fill_sockaddr_in(&groupAddr, state->Address()));
747 }
748 
749 
750 static net_protocol_module_info *
751 receiving_protocol(uint8 protocol)
752 {
753 	net_protocol_module_info *module = sReceivingProtocol[protocol];
754 	if (module != NULL)
755 		return module;
756 
757 	MutexLocker locker(sReceivingProtocolLock);
758 
759 	module = sReceivingProtocol[protocol];
760 	if (module != NULL)
761 		return module;
762 
763 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol, &module) == B_OK)
764 		sReceivingProtocol[protocol] = module;
765 
766 	return module;
767 }
768 
769 
770 static inline sockaddr *
771 fill_sockaddr_in(sockaddr_in *target, in_addr_t address)
772 {
773 	memset(target, 0, sizeof(sockaddr_in));
774 	target->sin_family = AF_INET;
775 	target->sin_len = sizeof(sockaddr_in);
776 	target->sin_addr.s_addr = address;
777 	return (sockaddr *)target;
778 }
779 
780 
781 static status_t
782 ipv4_delta_group(IPv4GroupInterface *group, int option,
783 	net_interface *interface, const in_addr *sourceAddr)
784 {
785 	switch (option) {
786 		case IP_ADD_MEMBERSHIP:
787 			return group->Add();
788 		case IP_DROP_MEMBERSHIP:
789 			return group->Drop();
790 		case IP_BLOCK_SOURCE:
791 			return group->BlockSource(*sourceAddr);
792 		case IP_UNBLOCK_SOURCE:
793 			return group->UnblockSource(*sourceAddr);
794 		case IP_ADD_SOURCE_MEMBERSHIP:
795 			return group->AddSSM(*sourceAddr);
796 		case IP_DROP_SOURCE_MEMBERSHIP:
797 			return group->DropSSM(*sourceAddr);
798 	}
799 
800 	return B_ERROR;
801 }
802 
803 
804 static status_t
805 ipv4_delta_membership(ipv4_protocol *protocol, int option,
806 	net_interface *interface, const in_addr *groupAddr,
807 	const in_addr *sourceAddr)
808 {
809 	IPv4MulticastFilter &filter = protocol->multicast_filter;
810 	IPv4GroupInterface *state = NULL;
811 	status_t status = B_OK;
812 
813 	switch (option) {
814 		case IP_ADD_MEMBERSHIP:
815 		case IP_ADD_SOURCE_MEMBERSHIP:
816 			status = filter.GetState(*groupAddr, interface, state, true);
817 			break;
818 
819 		case IP_DROP_MEMBERSHIP:
820 		case IP_BLOCK_SOURCE:
821 		case IP_UNBLOCK_SOURCE:
822 		case IP_DROP_SOURCE_MEMBERSHIP:
823 			filter.GetState(*groupAddr, interface, state, false);
824 			if (state == NULL) {
825 				if (option == IP_DROP_MEMBERSHIP
826 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
827 					return EADDRNOTAVAIL;
828 				else
829 					return EINVAL;
830 			}
831 			break;
832 	}
833 
834 	if (status < B_OK)
835 		return status;
836 
837 	status = ipv4_delta_group(state, option, interface, sourceAddr);
838 	filter.ReturnState(state);
839 	return status;
840 }
841 
842 
843 static int
844 generic_to_ipv4(int option)
845 {
846 	switch (option) {
847 		case MCAST_JOIN_GROUP:
848 			return IP_ADD_MEMBERSHIP;
849 		case MCAST_JOIN_SOURCE_GROUP:
850 			return IP_ADD_SOURCE_MEMBERSHIP;
851 		case MCAST_LEAVE_GROUP:
852 			return IP_DROP_MEMBERSHIP;
853 		case MCAST_BLOCK_SOURCE:
854 			return IP_BLOCK_SOURCE;
855 		case MCAST_UNBLOCK_SOURCE:
856 			return IP_UNBLOCK_SOURCE;
857 		case MCAST_LEAVE_SOURCE_GROUP:
858 			return IP_DROP_SOURCE_MEMBERSHIP;
859 	}
860 
861 	return -1;
862 }
863 
864 
865 static net_interface *
866 get_multicast_interface(ipv4_protocol *protocol, const in_addr *address)
867 {
868 	sockaddr_in groupAddr;
869 	net_route *route = sDatalinkModule->get_route(sDomain,
870 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
871 	if (route == NULL)
872 		return NULL;
873 
874 	return route->interface;
875 }
876 
877 
878 static status_t
879 ipv4_delta_membership(ipv4_protocol *protocol, int option,
880 	in_addr *interfaceAddr, in_addr *groupAddr, in_addr *sourceAddr)
881 {
882 	net_interface *interface = NULL;
883 
884 	if (interfaceAddr->s_addr == INADDR_ANY) {
885 		interface = get_multicast_interface(protocol, groupAddr);
886 	} else {
887 		sockaddr_in address;
888 		interface = sDatalinkModule->get_interface_with_address(sDomain,
889 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
890 	}
891 
892 	if (interface == NULL)
893 		return ENODEV;
894 
895 	return ipv4_delta_membership(protocol, option, interface,
896 		groupAddr, sourceAddr);
897 }
898 
899 
900 static status_t
901 ipv4_generic_delta_membership(ipv4_protocol *protocol, int option,
902 	uint32 index, const sockaddr_storage *_groupAddr,
903 	const sockaddr_storage *_sourceAddr)
904 {
905 	if (_groupAddr->ss_family != AF_INET)
906 		return EINVAL;
907 
908 	if (_sourceAddr && _sourceAddr->ss_family != AF_INET)
909 		return EINVAL;
910 
911 	net_interface *interface;
912 	const in_addr *groupAddr, *sourceAddr = NULL;
913 
914 	groupAddr = &((const sockaddr_in *)_groupAddr)->sin_addr;
915 
916 	if (index == 0)
917 		interface = get_multicast_interface(protocol, groupAddr);
918 	else
919 		interface = sDatalinkModule->get_interface(sDomain, index);
920 
921 	if (interface == NULL)
922 		return ENODEV;
923 
924 	if (_sourceAddr)
925 		sourceAddr = &((const sockaddr_in *)_sourceAddr)->sin_addr;
926 
927 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
928 		groupAddr, sourceAddr);
929 }
930 
931 
932 //	#pragma mark -
933 
934 
935 net_protocol *
936 ipv4_init_protocol(net_socket *socket)
937 {
938 	ipv4_protocol *protocol = new (std::nothrow) ipv4_protocol();
939 	if (protocol == NULL)
940 		return NULL;
941 
942 	protocol->raw = NULL;
943 	protocol->service_type = 0;
944 	protocol->time_to_live = kDefaultTTL;
945 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
946 	protocol->flags = 0;
947 	return protocol;
948 }
949 
950 
951 status_t
952 ipv4_uninit_protocol(net_protocol *_protocol)
953 {
954 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
955 
956 	delete protocol->raw;
957 	delete protocol;
958 	return B_OK;
959 }
960 
961 
962 /*!
963 	Since open() is only called on the top level protocol, when we get here
964 	it means we are on a SOCK_RAW socket.
965 */
966 status_t
967 ipv4_open(net_protocol *_protocol)
968 {
969 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
970 
971 	RawSocket *raw = new (std::nothrow) RawSocket(protocol->socket);
972 	if (raw == NULL)
973 		return B_NO_MEMORY;
974 
975 	status_t status = raw->InitCheck();
976 	if (status < B_OK) {
977 		delete raw;
978 		return status;
979 	}
980 
981 	TRACE_SK(protocol, "Open()");
982 
983 	protocol->raw = raw;
984 
985 	MutexLocker locker(sRawSocketsLock);
986 	sRawSockets.Add(raw);
987 	return B_OK;
988 }
989 
990 
991 status_t
992 ipv4_close(net_protocol *_protocol)
993 {
994 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
995 	RawSocket *raw = protocol->raw;
996 	if (raw == NULL)
997 		return B_ERROR;
998 
999 	TRACE_SK(protocol, "Close()");
1000 
1001 	MutexLocker locker(sRawSocketsLock);
1002 	sRawSockets.Remove(raw);
1003 	delete raw;
1004 	protocol->raw = NULL;
1005 
1006 	return B_OK;
1007 }
1008 
1009 
1010 status_t
1011 ipv4_free(net_protocol *protocol)
1012 {
1013 	return B_OK;
1014 }
1015 
1016 
1017 status_t
1018 ipv4_connect(net_protocol *protocol, const struct sockaddr *address)
1019 {
1020 	return B_ERROR;
1021 }
1022 
1023 
1024 status_t
1025 ipv4_accept(net_protocol *protocol, struct net_socket **_acceptedSocket)
1026 {
1027 	return EOPNOTSUPP;
1028 }
1029 
1030 
1031 static status_t
1032 get_int_option(void *target, size_t length, int value)
1033 {
1034 	if (length != sizeof(int))
1035 		return B_BAD_VALUE;
1036 
1037 	return user_memcpy(target, &value, sizeof(int));
1038 }
1039 
1040 
1041 template<typename Type> static status_t
1042 set_int_option(Type &target, const void *_value, size_t length)
1043 {
1044 	int value;
1045 
1046 	if (length != sizeof(int))
1047 		return B_BAD_VALUE;
1048 
1049 	if (user_memcpy(&value, _value, sizeof(int)) < B_OK)
1050 		return B_BAD_ADDRESS;
1051 
1052 	target = value;
1053 	return B_OK;
1054 }
1055 
1056 
1057 status_t
1058 ipv4_control(net_protocol *_protocol, int level, int option, void *value,
1059 	size_t *_length)
1060 {
1061 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1062 		return sDatalinkModule->control(sDomain, option, value, _length);
1063 
1064 	return B_BAD_VALUE;
1065 }
1066 
1067 
1068 status_t
1069 ipv4_getsockopt(net_protocol *_protocol, int level, int option, void *value,
1070 	int *_length)
1071 {
1072 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1073 
1074 	if (level == IPPROTO_IP) {
1075 		if (option == IP_HDRINCL)
1076 			return get_int_option(value, *_length,
1077 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1078 		else if (option == IP_TTL)
1079 			return get_int_option(value, *_length, protocol->time_to_live);
1080 		else if (option == IP_TOS)
1081 			return get_int_option(value, *_length, protocol->service_type);
1082 		else if (IP_MULTICAST_TTL)
1083 			return get_int_option(value, *_length,
1084 				protocol->multicast_time_to_live);
1085 		else if (option == IP_ADD_MEMBERSHIP
1086 			|| option == IP_DROP_MEMBERSHIP
1087 			|| option == IP_BLOCK_SOURCE
1088 			|| option == IP_UNBLOCK_SOURCE
1089 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1090 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1091 			|| option == MCAST_JOIN_GROUP
1092 			|| option == MCAST_LEAVE_GROUP
1093 			|| option == MCAST_BLOCK_SOURCE
1094 			|| option == MCAST_UNBLOCK_SOURCE
1095 			|| option == MCAST_JOIN_SOURCE_GROUP
1096 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1097 				// RFC 3678, Section 4.1:
1098 				// ``An error of EOPNOTSUPP is returned if these options are
1099 				// used with getsockopt().''
1100 				return EOPNOTSUPP;
1101 		} else {
1102 			dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1103 			return ENOPROTOOPT;
1104 		}
1105 	}
1106 
1107 	return sSocketModule->get_option(protocol->socket, level, option, value,
1108 		_length);
1109 }
1110 
1111 
1112 status_t
1113 ipv4_setsockopt(net_protocol *_protocol, int level, int option,
1114 	const void *value, int length)
1115 {
1116 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1117 
1118 	if (level == IPPROTO_IP) {
1119 		if (option == IP_HDRINCL) {
1120 			int headerIncluded;
1121 			if (length != sizeof(int))
1122 				return B_BAD_VALUE;
1123 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded)) < B_OK)
1124 				return B_BAD_ADDRESS;
1125 
1126 			if (headerIncluded)
1127 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1128 			else
1129 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1130 			return B_OK;
1131 		} else if (option == IP_TTL) {
1132 			return set_int_option(protocol->time_to_live, value, length);
1133 		} else if (option == IP_TOS) {
1134 			return set_int_option(protocol->service_type, value, length);
1135 		} else if (option == IP_MULTICAST_TTL) {
1136 			return set_int_option(protocol->multicast_time_to_live, value,
1137 				length);
1138 		} else if (option == IP_ADD_MEMBERSHIP
1139 			|| option == IP_DROP_MEMBERSHIP) {
1140 			ip_mreq mreq;
1141 			if (length != sizeof(ip_mreq))
1142 				return B_BAD_VALUE;
1143 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) < B_OK)
1144 				return B_BAD_ADDRESS;
1145 
1146 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1147 				&mreq.imr_multiaddr, NULL);
1148 		} else if (option == IP_BLOCK_SOURCE
1149 			|| option == IP_UNBLOCK_SOURCE
1150 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1151 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1152 			ip_mreq_source mreq;
1153 			if (length != sizeof(ip_mreq_source))
1154 				return B_BAD_VALUE;
1155 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) < B_OK)
1156 				return B_BAD_ADDRESS;
1157 
1158 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1159 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1160 		} else if (option == MCAST_LEAVE_GROUP
1161 			|| option == MCAST_JOIN_GROUP) {
1162 			group_req greq;
1163 			if (length != sizeof(group_req))
1164 				return B_BAD_VALUE;
1165 			if (user_memcpy(&greq, value, sizeof(group_req)) < B_OK)
1166 				return B_BAD_ADDRESS;
1167 
1168 			return ipv4_generic_delta_membership(protocol, option,
1169 				greq.gr_interface, &greq.gr_group, NULL);
1170 		} else if (option == MCAST_BLOCK_SOURCE
1171 			|| option == MCAST_UNBLOCK_SOURCE
1172 			|| option == MCAST_JOIN_SOURCE_GROUP
1173 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1174 			group_source_req greq;
1175 			if (length != sizeof(group_source_req))
1176 				return B_BAD_VALUE;
1177 			if (user_memcpy(&greq, value, sizeof(group_source_req)) < B_OK)
1178 				return B_BAD_ADDRESS;
1179 
1180 			return ipv4_generic_delta_membership(protocol, option,
1181 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1182 		} else {
1183 			dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1184 			return ENOPROTOOPT;
1185 		}
1186 	}
1187 
1188 	return sSocketModule->set_option(protocol->socket, level, option,
1189 		value, length);
1190 }
1191 
1192 
1193 status_t
1194 ipv4_bind(net_protocol *protocol, const struct sockaddr *address)
1195 {
1196 	if (address->sa_family != AF_INET)
1197 		return EAFNOSUPPORT;
1198 
1199 	// only INADDR_ANY and addresses of local interfaces are accepted:
1200 	if (((sockaddr_in *)address)->sin_addr.s_addr == INADDR_ANY
1201 		|| IN_MULTICAST(((sockaddr_in *)address)->sin_addr.s_addr)
1202 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1203 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1204 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1205 			// explicitly set length, as our callers can't be trusted to
1206 			// always provide the correct length!
1207 		return B_OK;
1208 	}
1209 
1210 	return B_ERROR;
1211 		// address is unknown on this host
1212 }
1213 
1214 
1215 status_t
1216 ipv4_unbind(net_protocol *protocol, struct sockaddr *address)
1217 {
1218 	// nothing to do here
1219 	return B_OK;
1220 }
1221 
1222 
1223 status_t
1224 ipv4_listen(net_protocol *protocol, int count)
1225 {
1226 	return EOPNOTSUPP;
1227 }
1228 
1229 
1230 status_t
1231 ipv4_shutdown(net_protocol *protocol, int direction)
1232 {
1233 	return EOPNOTSUPP;
1234 }
1235 
1236 
1237 status_t
1238 ipv4_send_routed_data(net_protocol *_protocol, struct net_route *route,
1239 	net_buffer *buffer)
1240 {
1241 	if (route == NULL)
1242 		return B_BAD_VALUE;
1243 
1244 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1245 	net_interface *interface = route->interface;
1246 
1247 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1248 		buffer->size);
1249 
1250 	sockaddr_in &source = *(sockaddr_in *)buffer->source;
1251 	sockaddr_in &destination = *(sockaddr_in *)buffer->destination;
1252 
1253 	bool headerIncluded = false, checksumNeeded = true;
1254 	if (protocol != NULL)
1255 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1256 
1257 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1258 
1259 	if (destination.sin_addr.s_addr == INADDR_ANY)
1260 		return EDESTADDRREQ;
1261 	else if (destination.sin_addr.s_addr == INADDR_BROADCAST) {
1262 		// TODO check for local broadcast addresses as well?
1263 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1264 			return B_BAD_VALUE;
1265 		buffer->flags |= MSG_BCAST;
1266 	} else if (IN_MULTICAST(destination.sin_addr.s_addr)) {
1267 		buffer->flags |= MSG_MCAST;
1268 	}
1269 
1270 	// Add IP header (if needed)
1271 
1272 	if (!headerIncluded) {
1273 		NetBufferPrepend<ipv4_header> header(buffer);
1274 		if (header.Status() < B_OK)
1275 			return header.Status();
1276 
1277 		header->version = IP_VERSION;
1278 		header->header_length = sizeof(ipv4_header) / 4;
1279 		header->service_type = protocol ? protocol->service_type : 0;
1280 		header->total_length = htons(buffer->size);
1281 		header->id = htons(atomic_add(&sPacketID, 1));
1282 		header->fragment_offset = 0;
1283 		if (protocol)
1284 			header->time_to_live = (buffer->flags & MSG_MCAST) ?
1285 				protocol->multicast_time_to_live : protocol->time_to_live;
1286 		else
1287 			header->time_to_live = (buffer->flags & MSG_MCAST) ?
1288 				kDefaultMulticastTTL : kDefaultTTL;
1289 		header->protocol = protocol ? protocol->socket->protocol : buffer->protocol;
1290 		header->checksum = 0;
1291 
1292 		header->source = source.sin_addr.s_addr;
1293 		header->destination = destination.sin_addr.s_addr;
1294 	} else {
1295 		// if IP_HDRINCL, check if the source address is set
1296 		NetBufferHeaderReader<ipv4_header> header(buffer);
1297 		if (header.Status() < B_OK)
1298 			return header.Status();
1299 
1300 		if (header->source == 0) {
1301 			header->source = source.sin_addr.s_addr;
1302 			header->checksum = 0;
1303 			header.Sync();
1304 		} else
1305 			checksumNeeded = false;
1306 	}
1307 
1308 	if (buffer->size > 0xffff)
1309 		return EMSGSIZE;
1310 
1311 	if (checksumNeeded)
1312 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1313 			sizeof(ipv4_header), true);
1314 
1315 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer checksum: %ld",
1316 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1317 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1318 
1319 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08lx",
1320 		ntohl(destination.sin_addr.s_addr));
1321 
1322 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1323 	if (buffer->size > mtu) {
1324 		// we need to fragment the packet
1325 		return send_fragments(protocol, route, buffer, mtu);
1326 	}
1327 
1328 	return sDatalinkModule->send_data(route, buffer);
1329 }
1330 
1331 
1332 status_t
1333 ipv4_send_data(net_protocol *_protocol, net_buffer *buffer)
1334 {
1335 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1336 
1337 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1338 
1339 	if (protocol && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1340 		if (buffer->size < sizeof(ipv4_header))
1341 			return EINVAL;
1342 
1343 		sockaddr_in *source = (sockaddr_in *)buffer->source;
1344 		sockaddr_in *destination = (sockaddr_in *)buffer->destination;
1345 
1346 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1347 			offsetof(ipv4_header, source)>(buffer));
1348 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1349 			offsetof(ipv4_header, destination)>(buffer));
1350 	}
1351 
1352 	return sDatalinkModule->send_datagram(protocol, sDomain, buffer);
1353 }
1354 
1355 
1356 ssize_t
1357 ipv4_send_avail(net_protocol *protocol)
1358 {
1359 	return B_ERROR;
1360 }
1361 
1362 
1363 status_t
1364 ipv4_read_data(net_protocol *_protocol, size_t numBytes, uint32 flags,
1365 	net_buffer **_buffer)
1366 {
1367 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1368 	RawSocket *raw = protocol->raw;
1369 	if (raw == NULL)
1370 		return B_ERROR;
1371 
1372 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1373 
1374 	return raw->SocketDequeue(flags, _buffer);
1375 }
1376 
1377 
1378 ssize_t
1379 ipv4_read_avail(net_protocol *_protocol)
1380 {
1381 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1382 	RawSocket *raw = protocol->raw;
1383 	if (raw == NULL)
1384 		return B_ERROR;
1385 
1386 	return raw->AvailableData();
1387 }
1388 
1389 
1390 struct net_domain *
1391 ipv4_get_domain(net_protocol *protocol)
1392 {
1393 	return sDomain;
1394 }
1395 
1396 
1397 size_t
1398 ipv4_get_mtu(net_protocol *protocol, const struct sockaddr *address)
1399 {
1400 	net_route *route = sDatalinkModule->get_route(sDomain, address);
1401 	if (route == NULL)
1402 		return 0;
1403 
1404 	size_t mtu;
1405 	if (route->mtu != 0)
1406 		mtu = route->mtu;
1407 	else
1408 		mtu = route->interface->mtu;
1409 
1410 	sDatalinkModule->put_route(sDomain, route);
1411 	return mtu - sizeof(ipv4_header);
1412 }
1413 
1414 
1415 status_t
1416 ipv4_receive_data(net_buffer *buffer)
1417 {
1418 	TRACE("ReceiveData(%p [%ld bytes])", buffer, buffer->size);
1419 
1420 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1421 	if (bufferHeader.Status() < B_OK)
1422 		return bufferHeader.Status();
1423 
1424 	ipv4_header &header = bufferHeader.Data();
1425 	//dump_ipv4_header(header);
1426 
1427 	if (header.version != IP_VERSION)
1428 		return B_BAD_TYPE;
1429 
1430 	uint16 packetLength = header.TotalLength();
1431 	uint16 headerLength = header.HeaderLength();
1432 	if (packetLength > buffer->size
1433 		|| headerLength < sizeof(ipv4_header))
1434 		return B_BAD_DATA;
1435 
1436 	// TODO: would be nice to have a direct checksum function somewhere
1437 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1438 		return B_BAD_DATA;
1439 
1440 	struct sockaddr_in &source = *(struct sockaddr_in *)buffer->source;
1441 	struct sockaddr_in &destination = *(struct sockaddr_in *)buffer->destination;
1442 
1443 	fill_sockaddr_in(&source, header.source);
1444 	fill_sockaddr_in(&destination, header.destination);
1445 
1446 	// lower layers notion of Broadcast or Multicast have no relevance to us
1447 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1448 
1449 	if (header.destination == INADDR_BROADCAST) {
1450 		buffer->flags |= MSG_BCAST;
1451 	} else if (IN_MULTICAST(header.destination)) {
1452 		buffer->flags |= MSG_MCAST;
1453 	} else {
1454 		uint32 matchedAddressType = 0;
1455 		// test if the packet is really for us
1456 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1457 			&buffer->interface, &matchedAddressType)) {
1458 			TRACE("  ReceiveData(): packet was not for us %lx -> %lx",
1459 				ntohl(header.source), ntohl(header.destination));
1460 			return B_ERROR;
1461 		}
1462 
1463 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1464 		buffer->flags |= matchedAddressType;
1465 	}
1466 
1467 	uint8 protocol = buffer->protocol = header.protocol;
1468 
1469 	// remove any trailing/padding data
1470 	status_t status = gBufferModule->trim(buffer, packetLength);
1471 	if (status < B_OK)
1472 		return status;
1473 
1474 	// check for fragmentation
1475 	uint16 fragmentOffset = ntohs(header.fragment_offset);
1476 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1477 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1478 		// this is a fragment
1479 		TRACE("  ReceiveData(): Found a Fragment!");
1480 		status = reassemble_fragments(header, &buffer);
1481 		TRACE("  ReceiveData():  -> %s", strerror(status));
1482 		if (status != B_OK)
1483 			return status;
1484 
1485 		if (buffer == NULL) {
1486 			// buffer was put into fragment packet
1487 			TRACE("  ReceiveData(): Not yet assembled.");
1488 			return B_OK;
1489 		}
1490 	}
1491 
1492 	// Since the buffer might have been changed (reassembled fragment)
1493 	// we must no longer access bufferHeader or header anymore after
1494 	// this point
1495 
1496 	raw_receive_data(buffer);
1497 
1498 	gBufferModule->remove_header(buffer, headerLength);
1499 		// the header is of variable size and may include IP options
1500 		// (that we ignore for now)
1501 
1502 	net_protocol_module_info *module = receiving_protocol(protocol);
1503 	if (module == NULL) {
1504 		// no handler for this packet
1505 		return EAFNOSUPPORT;
1506 	}
1507 
1508 	if (buffer->flags & MSG_MCAST) {
1509 		// Unfortunely historical reasons dictate that the IP multicast
1510 		// model be a little different from the unicast one. We deliver
1511 		// this frame directly to all sockets registered with interest
1512 		// for this multicast group.
1513 		return deliver_multicast(module, buffer, false);
1514 	}
1515 
1516 	return module->receive_data(buffer);
1517 }
1518 
1519 
1520 status_t
1521 ipv4_deliver_data(net_protocol *_protocol, net_buffer *buffer)
1522 {
1523 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1524 
1525 	if (protocol->raw == NULL)
1526 		return B_ERROR;
1527 
1528 	return protocol->raw->SocketEnqueue(buffer);
1529 }
1530 
1531 
1532 status_t
1533 ipv4_error(uint32 code, net_buffer *data)
1534 {
1535 	return B_ERROR;
1536 }
1537 
1538 
1539 status_t
1540 ipv4_error_reply(net_protocol *protocol, net_buffer *causedError, uint32 code,
1541 	void *errorData)
1542 {
1543 	return B_ERROR;
1544 }
1545 
1546 
1547 static int
1548 dump_ipv4_multicast(int argc, char *argv[])
1549 {
1550 	MulticastState::Iterator it = sMulticastState->GetIterator();
1551 
1552 	while (it.HasNext()) {
1553 		IPv4GroupInterface *state = it.Next();
1554 
1555 		char addrBuf[64];
1556 
1557 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
1558 			print_address(&state->Address(), addrBuf, sizeof(addrBuf)),
1559 			state->Mode() == IPv4GroupInterface::kExclude ?  "Exclude" :
1560 			"Include");
1561 
1562 		int count = 0;
1563 		IPv4GroupInterface::AddressSet::Iterator it =
1564 			state->Sources().GetIterator();
1565 		while (it.HasNext()) {
1566 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
1567 				addrBuf, sizeof(addrBuf)));
1568 			count++;
1569 		}
1570 
1571 		kprintf("}> sock %p\n", state->Parent()->Socket());
1572 	}
1573 
1574 	return 0;
1575 }
1576 
1577 
1578 //	#pragma mark -
1579 
1580 
1581 status_t
1582 init_ipv4()
1583 {
1584 	sPacketID = (int32)system_time();
1585 
1586 	mutex_init(&sRawSocketsLock, "raw sockets");
1587 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1588 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1589 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1590 
1591 	status_t status;
1592 
1593 	sMulticastState = new MulticastState();
1594 	if (sMulticastState == NULL) {
1595 		status = B_NO_MEMORY;
1596 		goto err4;
1597 	}
1598 
1599 	status = sMulticastState->Init();
1600 	if (status < B_OK)
1601 		goto err5;
1602 
1603 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1604 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1605 	if (sFragmentHash == NULL)
1606 		goto err5;
1607 
1608 	new (&sRawSockets) RawSocketList;
1609 		// static initializers do not work in the kernel,
1610 		// so we have to do it here, manually
1611 		// TODO: for modules, this shouldn't be required
1612 
1613 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1614 		"network/protocols/ipv4/v1", NULL);
1615 	if (status < B_OK)
1616 		goto err6;
1617 
1618 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1619 		&gIPv4AddressModule, &sDomain);
1620 	if (status < B_OK)
1621 		goto err6;
1622 
1623 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1624 		"list all current IPv4 multicast states");
1625 
1626 	return B_OK;
1627 
1628 err6:
1629 	hash_uninit(sFragmentHash);
1630 err5:
1631 	delete sMulticastState;
1632 err4:
1633 	mutex_destroy(&sReceivingProtocolLock);
1634 	mutex_destroy(&sMulticastGroupsLock);
1635 	mutex_destroy(&sFragmentLock);
1636 	mutex_destroy(&sRawSocketsLock);
1637 	return status;
1638 }
1639 
1640 
1641 status_t
1642 uninit_ipv4()
1643 {
1644 	mutex_lock(&sReceivingProtocolLock);
1645 
1646 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1647 
1648 	// put all the domain receiving protocols we gathered so far
1649 	for (uint32 i = 0; i < 256; i++) {
1650 		if (sReceivingProtocol[i] != NULL)
1651 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1652 	}
1653 
1654 	gStackModule->unregister_domain(sDomain);
1655 	mutex_unlock(&sReceivingProtocolLock);
1656 
1657 	delete sMulticastState;
1658 	hash_uninit(sFragmentHash);
1659 
1660 	mutex_destroy(&sMulticastGroupsLock);
1661 	mutex_destroy(&sFragmentLock);
1662 	mutex_destroy(&sRawSocketsLock);
1663 	mutex_destroy(&sReceivingProtocolLock);
1664 
1665 	return B_OK;
1666 }
1667 
1668 
1669 static status_t
1670 ipv4_std_ops(int32 op, ...)
1671 {
1672 	switch (op) {
1673 		case B_MODULE_INIT:
1674 			return init_ipv4();
1675 		case B_MODULE_UNINIT:
1676 			return uninit_ipv4();
1677 
1678 		default:
1679 			return B_ERROR;
1680 	}
1681 }
1682 
1683 
1684 net_protocol_module_info gIPv4Module = {
1685 	{
1686 		"network/protocols/ipv4/v1",
1687 		0,
1688 		ipv4_std_ops
1689 	},
1690 	NET_PROTOCOL_ATOMIC_MESSAGES,
1691 
1692 	ipv4_init_protocol,
1693 	ipv4_uninit_protocol,
1694 	ipv4_open,
1695 	ipv4_close,
1696 	ipv4_free,
1697 	ipv4_connect,
1698 	ipv4_accept,
1699 	ipv4_control,
1700 	ipv4_getsockopt,
1701 	ipv4_setsockopt,
1702 	ipv4_bind,
1703 	ipv4_unbind,
1704 	ipv4_listen,
1705 	ipv4_shutdown,
1706 	ipv4_send_data,
1707 	ipv4_send_routed_data,
1708 	ipv4_send_avail,
1709 	ipv4_read_data,
1710 	ipv4_read_avail,
1711 	ipv4_get_domain,
1712 	ipv4_get_mtu,
1713 	ipv4_receive_data,
1714 	ipv4_deliver_data,
1715 	ipv4_error,
1716 	ipv4_error_reply,
1717 };
1718 
1719 module_dependency module_dependencies[] = {
1720 	{NET_STACK_MODULE_NAME, (module_info **)&gStackModule},
1721 	{NET_BUFFER_MODULE_NAME, (module_info **)&gBufferModule},
1722 	{NET_DATALINK_MODULE_NAME, (module_info **)&sDatalinkModule},
1723 	{NET_SOCKET_MODULE_NAME, (module_info **)&sSocketModule},
1724 	{}
1725 };
1726 
1727 module_info *modules[] = {
1728 	(module_info *)&gIPv4Module,
1729 	NULL
1730 };
1731