xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision a1163de83ea633463a79de234b8742ee106531b2)
1 /*
2  * Copyright 2006-2008, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4_address.h"
11 #include "multicast.h"
12 
13 #include <net_datalink.h>
14 #include <net_datalink_protocol.h>
15 #include <net_device.h>
16 #include <net_protocol.h>
17 #include <net_stack.h>
18 #include <NetBufferUtilities.h>
19 #include <ProtocolUtilities.h>
20 
21 #include <ByteOrder.h>
22 #include <KernelExport.h>
23 #include <util/AutoLock.h>
24 #include <util/list.h>
25 #include <util/khash.h>
26 #include <util/DoublyLinkedList.h>
27 #include <util/MultiHashTable.h>
28 
29 #include <netinet/in.h>
30 #include <netinet/ip.h>
31 #include <new>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <utility>
36 
37 
38 //#define TRACE_IPV4
39 #ifdef TRACE_IPV4
40 #	define TRACE(format, args...) \
41 		dprintf("IPv4 [%llu] " format "\n", system_time() , ##args)
42 #	define TRACE_SK(protocol, format, args...) \
43 		dprintf("IPv4 [%llu] %p " format "\n", system_time(), \
44 			protocol , ##args)
45 #else
46 #	define TRACE(args...)		do { } while (0)
47 #	define TRACE_SK(args...)	do { } while (0)
48 #endif
49 
50 struct ipv4_header {
51 #if B_HOST_IS_LENDIAN == 1
52 	uint8		header_length : 4;	// header length in 32-bit words
53 	uint8		version : 4;
54 #else
55 	uint8		version : 4;
56 	uint8		header_length : 4;
57 #endif
58 	uint8		service_type;
59 	uint16		total_length;
60 	uint16		id;
61 	uint16		fragment_offset;
62 	uint8		time_to_live;
63 	uint8		protocol;
64 	uint16		checksum;
65 	in_addr_t	source;
66 	in_addr_t	destination;
67 
68 	uint16 HeaderLength() const { return header_length << 2; }
69 	uint16 TotalLength() const { return ntohs(total_length); }
70 	uint16 FragmentOffset() const { return ntohs(fragment_offset); }
71 } _PACKED;
72 
73 #define IP_VERSION				4
74 
75 // fragment flags
76 #define IP_RESERVED_FLAG		0x8000
77 #define IP_DONT_FRAGMENT		0x4000
78 #define IP_MORE_FRAGMENTS		0x2000
79 #define IP_FRAGMENT_OFFSET_MASK	0x1fff
80 
81 #define MAX_HASH_FRAGMENTS 		64
82 	// slots in the fragment packet's hash
83 #define FRAGMENT_TIMEOUT		60000000LL
84 	// discard fragment after 60 seconds
85 
86 typedef DoublyLinkedList<struct net_buffer,
87 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
88 
89 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
90 
91 struct ipv4_packet_key {
92 	in_addr_t	source;
93 	in_addr_t	destination;
94 	uint16		id;
95 	uint8		protocol;
96 };
97 
98 class FragmentPacket {
99 	public:
100 		FragmentPacket(const ipv4_packet_key &key);
101 		~FragmentPacket();
102 
103 		status_t AddFragment(uint16 start, uint16 end, net_buffer *buffer,
104 					bool lastFragment);
105 		status_t Reassemble(net_buffer *to);
106 
107 		bool IsComplete() const { return fReceivedLastFragment && fBytesLeft == 0; }
108 
109 		static uint32 Hash(void *_packet, const void *_key, uint32 range);
110 		static int Compare(void *_packet, const void *_key);
111 		static int32 NextOffset() { return offsetof(FragmentPacket, fNext); }
112 		static void StaleTimer(struct net_timer *timer, void *data);
113 
114 	private:
115 		FragmentPacket	*fNext;
116 		struct ipv4_packet_key fKey;
117 		bool			fReceivedLastFragment;
118 		int32			fBytesLeft;
119 		FragmentList	fFragments;
120 		net_timer		fTimer;
121 };
122 
123 
124 class RawSocket : public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
125 	public:
126 		RawSocket(net_socket *socket);
127 };
128 
129 typedef DoublyLinkedList<RawSocket> RawSocketList;
130 
131 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
132 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
133 
134 struct MulticastStateHash {
135 	typedef std::pair<const in_addr *, uint32> KeyType;
136 	typedef IPv4GroupInterface ValueType;
137 
138 	size_t HashKey(const KeyType &key) const
139 		{ return key.first->s_addr ^ key.second; }
140 	size_t Hash(ValueType *value) const
141 		{ return HashKey(std::make_pair(&value->Address(),
142 			value->Interface()->index)); }
143 	bool Compare(const KeyType &key, ValueType *value) const
144 		{ return value->Interface()->index == key.second
145 			&& value->Address().s_addr == key.first->s_addr; }
146 	bool CompareValues(ValueType *value1, ValueType *value2) const
147 		{ return value1->Interface()->index == value2->Interface()->index
148 			&& value1->Address().s_addr == value2->Address().s_addr; }
149 	HashTableLink<ValueType> *GetLink(ValueType *value) const { return value; }
150 };
151 
152 
153 struct ipv4_protocol : net_protocol {
154 	ipv4_protocol()
155 		: multicast_filter(this) {}
156 
157 	RawSocket	*raw;
158 	uint8		service_type;
159 	uint8		time_to_live;
160 	uint8		multicast_time_to_live;
161 	uint32		flags;
162 
163 	IPv4MulticastFilter multicast_filter;
164 };
165 
166 // protocol flags
167 #define IP_FLAG_HEADER_INCLUDED	0x01
168 
169 
170 static const int kDefaultTTL = 254;
171 static const int kDefaultMulticastTTL = 1;
172 
173 
174 extern net_protocol_module_info gIPv4Module;
175 	// we need this in ipv4_std_ops() for registering the AF_INET domain
176 
177 net_stack_module_info *gStackModule;
178 net_buffer_module_info *gBufferModule;
179 
180 static struct net_domain *sDomain;
181 static net_datalink_module_info *sDatalinkModule;
182 static net_socket_module_info *sSocketModule;
183 static int32 sPacketID;
184 static RawSocketList sRawSockets;
185 static mutex sRawSocketsLock;
186 static mutex sFragmentLock;
187 static hash_table *sFragmentHash;
188 static mutex sMulticastGroupsLock;
189 
190 typedef MultiHashTable<MulticastStateHash> MulticastState;
191 static MulticastState *sMulticastState;
192 
193 static net_protocol_module_info *sReceivingProtocol[256];
194 static mutex sReceivingProtocolLock;
195 
196 
197 static const char *
198 print_address(const in_addr *address, char *buf, size_t bufLen)
199 {
200 	unsigned int addr = ntohl(address->s_addr);
201 
202 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
203 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
204 
205 	return buf;
206 }
207 
208 
209 RawSocket::RawSocket(net_socket *socket)
210 	: DatagramSocket<>("ipv4 raw socket", socket)
211 {
212 }
213 
214 
215 //	#pragma mark -
216 
217 
218 FragmentPacket::FragmentPacket(const ipv4_packet_key &key)
219 	:
220 	fKey(key),
221 	fReceivedLastFragment(false),
222 	fBytesLeft(IP_MAXPACKET)
223 {
224 	gStackModule->init_timer(&fTimer, StaleTimer, this);
225 }
226 
227 
228 FragmentPacket::~FragmentPacket()
229 {
230 	// cancel the kill timer
231 	gStackModule->set_timer(&fTimer, -1);
232 
233 	// delete all fragments
234 	net_buffer *buffer;
235 	while ((buffer = fFragments.RemoveHead()) != NULL) {
236 		gBufferModule->free(buffer);
237 	}
238 }
239 
240 
241 status_t
242 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer *buffer,
243 	bool lastFragment)
244 {
245 	// restart the timer
246 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
247 
248 	if (start >= end) {
249 		// invalid fragment
250 		return B_BAD_DATA;
251 	}
252 
253 	// Search for a position in the list to insert the fragment
254 
255 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
256 	net_buffer *previous = NULL;
257 	net_buffer *next = NULL;
258 	while ((previous = iterator.Next()) != NULL) {
259 		if (previous->fragment.start <= start) {
260 			// The new fragment can be inserted after this one
261 			break;
262 		}
263 
264 		next = previous;
265 	}
266 
267 	// See if we already have the fragment's data
268 
269 	if (previous != NULL && previous->fragment.start <= start
270 		&& previous->fragment.end >= end) {
271 		// we do, so we can just drop this fragment
272 		gBufferModule->free(buffer);
273 		return B_OK;
274 	}
275 
276 	TRACE("    previous: %p, next: %p", previous, next);
277 
278 	// If we have parts of the data already, truncate as needed
279 
280 	if (previous != NULL && previous->fragment.end > start) {
281 		TRACE("    remove header %d bytes", previous->fragment.end - start);
282 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
283 		start = previous->fragment.end;
284 	}
285 	if (next != NULL && next->fragment.start < end) {
286 		TRACE("    remove trailer %d bytes", next->fragment.start - end);
287 		gBufferModule->remove_trailer(buffer, next->fragment.start - end);
288 		end = next->fragment.start;
289 	}
290 
291 	// Now try if we can already merge the fragments together
292 
293 	// We will always keep the last buffer received, so that we can still
294 	// report an error (in which case we're not responsible for freeing it)
295 
296 	if (previous != NULL && previous->fragment.end == start) {
297 		fFragments.Remove(previous);
298 
299 		buffer->fragment.start = previous->fragment.start;
300 		buffer->fragment.end = end;
301 
302 		status_t status = gBufferModule->merge(buffer, previous, false);
303 		TRACE("    merge previous: %s", strerror(status));
304 		if (status < B_OK) {
305 			fFragments.Insert(next, previous);
306 			return status;
307 		}
308 
309 		fFragments.Insert(next, buffer);
310 
311 		// cut down existing hole
312 		fBytesLeft -= end - start;
313 
314 		if (lastFragment && !fReceivedLastFragment) {
315 			fReceivedLastFragment = true;
316 			fBytesLeft -= IP_MAXPACKET - end;
317 		}
318 
319 		TRACE("    hole length: %d", (int)fBytesLeft);
320 
321 		return B_OK;
322 	} else if (next != NULL && next->fragment.start == end) {
323 		fFragments.Remove(next);
324 
325 		buffer->fragment.start = start;
326 		buffer->fragment.end = next->fragment.end;
327 
328 		status_t status = gBufferModule->merge(buffer, next, true);
329 		TRACE("    merge next: %s", strerror(status));
330 		if (status < B_OK) {
331 			fFragments.Insert((net_buffer *)previous->link.next, next);
332 			return status;
333 		}
334 
335 		fFragments.Insert((net_buffer *)previous->link.next, buffer);
336 
337 		// cut down existing hole
338 		fBytesLeft -= end - start;
339 
340 		if (lastFragment && !fReceivedLastFragment) {
341 			fReceivedLastFragment = true;
342 			fBytesLeft -= IP_MAXPACKET - end;
343 		}
344 
345 		TRACE("    hole length: %d", (int)fBytesLeft);
346 
347 		return B_OK;
348 	}
349 
350 	// We couldn't merge the fragments, so we need to add it as is
351 
352 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
353 
354 	buffer->fragment.start = start;
355 	buffer->fragment.end = end;
356 	fFragments.Insert(next, buffer);
357 
358 	// update length of the hole, if any
359 	fBytesLeft -= end - start;
360 
361 	if (lastFragment && !fReceivedLastFragment) {
362 		fReceivedLastFragment = true;
363 		fBytesLeft -= IP_MAXPACKET - end;
364 	}
365 
366 	TRACE("    hole length: %d", (int)fBytesLeft);
367 
368 	return B_OK;
369 }
370 
371 
372 /*!	Reassembles the fragments to the specified buffer \a to.
373 	This buffer must have been added via AddFragment() before.
374 */
375 status_t
376 FragmentPacket::Reassemble(net_buffer *to)
377 {
378 	if (!IsComplete())
379 		return B_ERROR;
380 
381 	net_buffer *buffer = NULL;
382 
383 	net_buffer *fragment;
384 	while ((fragment = fFragments.RemoveHead()) != NULL) {
385 		if (buffer != NULL) {
386 			status_t status;
387 			if (to == fragment) {
388 				status = gBufferModule->merge(fragment, buffer, false);
389 				buffer = fragment;
390 			} else
391 				status = gBufferModule->merge(buffer, fragment, true);
392 			if (status < B_OK)
393 				return status;
394 		} else
395 			buffer = fragment;
396 	}
397 
398 	if (buffer != to)
399 		panic("ipv4 packet reassembly did not work correctly.\n");
400 
401 	return B_OK;
402 }
403 
404 
405 int
406 FragmentPacket::Compare(void *_packet, const void *_key)
407 {
408 	const ipv4_packet_key *key = (ipv4_packet_key *)_key;
409 	ipv4_packet_key *packetKey = &((FragmentPacket *)_packet)->fKey;
410 
411 	if (packetKey->id == key->id
412 		&& packetKey->source == key->source
413 		&& packetKey->destination == key->destination
414 		&& packetKey->protocol == key->protocol)
415 		return 0;
416 
417 	return 1;
418 }
419 
420 
421 uint32
422 FragmentPacket::Hash(void *_packet, const void *_key, uint32 range)
423 {
424 	const struct ipv4_packet_key *key = (struct ipv4_packet_key *)_key;
425 	FragmentPacket *packet = (FragmentPacket *)_packet;
426 	if (packet != NULL)
427 		key = &packet->fKey;
428 
429 	return (key->source ^ key->destination ^ key->protocol ^ key->id) % range;
430 }
431 
432 
433 /*static*/ void
434 FragmentPacket::StaleTimer(struct net_timer *timer, void *data)
435 {
436 	FragmentPacket *packet = (FragmentPacket *)data;
437 	TRACE("Assembling FragmentPacket %p timed out!", packet);
438 
439 	MutexLocker locker(&sFragmentLock);
440 
441 	hash_remove(sFragmentHash, packet);
442 	delete packet;
443 }
444 
445 
446 //	#pragma mark -
447 
448 
449 #if 0
450 static void
451 dump_ipv4_header(ipv4_header &header)
452 {
453 	struct pretty_ipv4 {
454 	#if B_HOST_IS_LENDIAN == 1
455 		uint8 a;
456 		uint8 b;
457 		uint8 c;
458 		uint8 d;
459 	#else
460 		uint8 d;
461 		uint8 c;
462 		uint8 b;
463 		uint8 a;
464 	#endif
465 	};
466 	struct pretty_ipv4 *src = (struct pretty_ipv4 *)&header.source;
467 	struct pretty_ipv4 *dst = (struct pretty_ipv4 *)&header.destination;
468 	dprintf("  version: %d\n", header.version);
469 	dprintf("  header_length: 4 * %d\n", header.header_length);
470 	dprintf("  service_type: %d\n", header.service_type);
471 	dprintf("  total_length: %d\n", header.TotalLength());
472 	dprintf("  id: %d\n", ntohs(header.id));
473 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
474 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
475 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
476 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
477 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
478 	dprintf("  time_to_live: %d\n", header.time_to_live);
479 	dprintf("  protocol: %d\n", header.protocol);
480 	dprintf("  checksum: %d\n", ntohs(header.checksum));
481 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
482 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
483 }
484 #endif
485 
486 
487 /*!
488 	Attempts to re-assemble fragmented packets.
489 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
490 		will point to its buffer, otherwise, it will be \c NULL.
491 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
492 */
493 static status_t
494 reassemble_fragments(const ipv4_header &header, net_buffer **_buffer)
495 {
496 	net_buffer *buffer = *_buffer;
497 	status_t status;
498 
499 	struct ipv4_packet_key key;
500 	key.source = (in_addr_t)header.source;
501 	key.destination = (in_addr_t)header.destination;
502 	key.id = header.id;
503 	key.protocol = header.protocol;
504 
505 	// TODO: Make locking finer grained.
506 	MutexLocker locker(&sFragmentLock);
507 
508 	FragmentPacket *packet = (FragmentPacket *)hash_lookup(sFragmentHash, &key);
509 	if (packet == NULL) {
510 		// New fragment packet
511 		packet = new (std::nothrow) FragmentPacket(key);
512 		if (packet == NULL)
513 			return B_NO_MEMORY;
514 
515 		// add packet to hash
516 		status = hash_insert(sFragmentHash, packet);
517 		if (status != B_OK) {
518 			delete packet;
519 			return status;
520 		}
521 	}
522 
523 	uint16 fragmentOffset = header.FragmentOffset();
524 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
525 	uint16 end = start + header.TotalLength() - header.HeaderLength();
526 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
527 
528 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
529 		lastFragment ? "last ": "", end - start, start);
530 
531 	// Remove header unless this is the first fragment
532 	if (start != 0)
533 		gBufferModule->remove_header(buffer, header.HeaderLength());
534 
535 	status = packet->AddFragment(start, end, buffer, lastFragment);
536 	if (status != B_OK)
537 		return status;
538 
539 	if (packet->IsComplete()) {
540 		hash_remove(sFragmentHash, packet);
541 			// no matter if reassembling succeeds, we won't need this packet anymore
542 
543 		status = packet->Reassemble(buffer);
544 		delete packet;
545 
546 		// _buffer does not change
547 		return status;
548 	}
549 
550 	// This indicates that the packet is not yet complete
551 	*_buffer = NULL;
552 	return B_OK;
553 }
554 
555 
556 /*!
557 	Fragments the incoming buffer and send all fragments via the specified
558 	\a route.
559 */
560 static status_t
561 send_fragments(ipv4_protocol *protocol, struct net_route *route,
562 	net_buffer *buffer, uint32 mtu)
563 {
564 	TRACE_SK(protocol, "SendFragments(%lu bytes, mtu %lu)", buffer->size, mtu);
565 
566 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
567 	if (originalHeader.Status() < B_OK)
568 		return originalHeader.Status();
569 
570 	uint16 headerLength = originalHeader->HeaderLength();
571 	uint32 bytesLeft = buffer->size - headerLength;
572 	uint32 fragmentOffset = 0;
573 	status_t status = B_OK;
574 
575 	net_buffer *headerBuffer = gBufferModule->split(buffer, headerLength);
576 	if (headerBuffer == NULL)
577 		return B_NO_MEMORY;
578 
579 	// TODO we need to make sure ipv4_header is contiguous or
580 	//      use another construct.
581 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
582 	ipv4_header *header = &bufferHeader.Data();
583 
584 	// adapt MTU to be a multiple of 8 (fragment offsets can only be specified this way)
585 	mtu -= headerLength;
586 	mtu &= ~7;
587 	TRACE("  adjusted MTU to %ld\n", mtu);
588 
589 	TRACE("  bytesLeft = %ld\n", bytesLeft);
590 	while (bytesLeft > 0) {
591 		uint32 fragmentLength = min_c(bytesLeft, mtu);
592 		bytesLeft -= fragmentLength;
593 		bool lastFragment = bytesLeft == 0;
594 
595 		header->total_length = htons(fragmentLength + headerLength);
596 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
597 			| (fragmentOffset >> 3));
598 		header->checksum = 0;
599 		header->checksum = gStackModule->checksum((uint8 *)header, headerLength);
600 			// TODO: compute the checksum only for those parts that changed?
601 
602 		TRACE("  send fragment of %ld bytes (%ld bytes left)\n", fragmentLength, bytesLeft);
603 
604 		net_buffer *fragmentBuffer;
605 		if (!lastFragment) {
606 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
607 			fragmentOffset += fragmentLength;
608 		} else
609 			fragmentBuffer = buffer;
610 
611 		if (fragmentBuffer == NULL) {
612 			status = B_NO_MEMORY;
613 			break;
614 		}
615 
616 		// copy header to fragment
617 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
618 
619 		// send fragment
620 		if (status == B_OK)
621 			status = sDatalinkModule->send_data(route, fragmentBuffer);
622 
623 		if (lastFragment) {
624 			// we don't own the last buffer, so we don't have to free it
625 			break;
626 		}
627 
628 		if (status < B_OK) {
629 			gBufferModule->free(fragmentBuffer);
630 			break;
631 		}
632 	}
633 
634 	gBufferModule->free(headerBuffer);
635 	return status;
636 }
637 
638 
639 static status_t
640 deliver_multicast(net_protocol_module_info *module, net_buffer *buffer,
641 	bool deliverToRaw)
642 {
643 	if (module->deliver_data == NULL)
644 		return B_OK;
645 
646 	MutexLocker _(sMulticastGroupsLock);
647 
648 	sockaddr_in *multicastAddr = (sockaddr_in *)buffer->destination;
649 
650 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
651 		&multicastAddr->sin_addr, buffer->interface->index));
652 
653 	while (it.HasNext()) {
654 		IPv4GroupInterface *state = it.Next();
655 
656 		if (deliverToRaw && state->Parent()->Socket()->raw == NULL)
657 			continue;
658 
659 		if (state->FilterAccepts(buffer)) {
660 			// as Multicast filters are installed with an IPv4 protocol
661 			// reference, we need to go and find the appropriate instance
662 			// related to the 'receiving protocol' with module 'module'.
663 			net_protocol *proto =
664 				state->Parent()->Socket()->socket->first_protocol;
665 
666 			while (proto && proto->module != module)
667 				proto = proto->next;
668 
669 			if (proto)
670 				module->deliver_data(proto, buffer);
671 		}
672 	}
673 
674 	return B_OK;
675 }
676 
677 
678 static void
679 raw_receive_data(net_buffer *buffer)
680 {
681 	MutexLocker locker(sRawSocketsLock);
682 
683 	if (sRawSockets.IsEmpty())
684 		return;
685 
686 	TRACE("RawReceiveData(%i)", buffer->protocol);
687 
688 	if (buffer->flags & MSG_MCAST) {
689 		// we need to call deliver_multicast here separately as
690 		// buffer still has the IP header, and it won't in the
691 		// next call. This isn't very optimized but works for now.
692 		// A better solution would be to hold separate hash tables
693 		// and lists for RAW and non-RAW sockets.
694 		deliver_multicast(&gIPv4Module, buffer, true);
695 	} else {
696 		RawSocketList::Iterator iterator = sRawSockets.GetIterator();
697 
698 		while (iterator.HasNext()) {
699 			RawSocket *raw = iterator.Next();
700 
701 			if (raw->Socket()->protocol == buffer->protocol)
702 				raw->SocketEnqueue(buffer);
703 		}
704 	}
705 }
706 
707 
708 static sockaddr *
709 fill_sockaddr_in(sockaddr_in *destination, const in_addr &source)
710 {
711 	memset(destination, 0, sizeof(sockaddr_in));
712 	destination->sin_family = AF_INET;
713 	destination->sin_addr = source;
714 	return (sockaddr *)destination;
715 }
716 
717 
718 status_t
719 IPv4Multicast::JoinGroup(IPv4GroupInterface *state)
720 {
721 	MutexLocker _(sMulticastGroupsLock);
722 
723 	sockaddr_in groupAddr;
724 	net_interface *intf = state->Interface();
725 
726 	status_t status = intf->first_info->join_multicast(intf->first_protocol,
727 		fill_sockaddr_in(&groupAddr, state->Address()));
728 	if (status < B_OK)
729 		return status;
730 
731 	sMulticastState->Insert(state);
732 	return B_OK;
733 }
734 
735 
736 status_t
737 IPv4Multicast::LeaveGroup(IPv4GroupInterface *state)
738 {
739 	MutexLocker _(sMulticastGroupsLock);
740 
741 	sMulticastState->Remove(state);
742 
743 	sockaddr_in groupAddr;
744 	net_interface *intf = state->Interface();
745 
746 	return intf->first_protocol->module->join_multicast(intf->first_protocol,
747 		fill_sockaddr_in(&groupAddr, state->Address()));
748 }
749 
750 
751 static net_protocol_module_info *
752 receiving_protocol(uint8 protocol)
753 {
754 	net_protocol_module_info *module = sReceivingProtocol[protocol];
755 	if (module != NULL)
756 		return module;
757 
758 	MutexLocker locker(sReceivingProtocolLock);
759 
760 	module = sReceivingProtocol[protocol];
761 	if (module != NULL)
762 		return module;
763 
764 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol, &module) == B_OK)
765 		sReceivingProtocol[protocol] = module;
766 
767 	return module;
768 }
769 
770 
771 static inline sockaddr *
772 fill_sockaddr_in(sockaddr_in *target, in_addr_t address)
773 {
774 	memset(target, 0, sizeof(sockaddr_in));
775 	target->sin_family = AF_INET;
776 	target->sin_len = sizeof(sockaddr_in);
777 	target->sin_addr.s_addr = address;
778 	return (sockaddr *)target;
779 }
780 
781 
782 static status_t
783 ipv4_delta_group(IPv4GroupInterface *group, int option,
784 	net_interface *interface, const in_addr *sourceAddr)
785 {
786 	switch (option) {
787 		case IP_ADD_MEMBERSHIP:
788 			return group->Add();
789 		case IP_DROP_MEMBERSHIP:
790 			return group->Drop();
791 		case IP_BLOCK_SOURCE:
792 			return group->BlockSource(*sourceAddr);
793 		case IP_UNBLOCK_SOURCE:
794 			return group->UnblockSource(*sourceAddr);
795 		case IP_ADD_SOURCE_MEMBERSHIP:
796 			return group->AddSSM(*sourceAddr);
797 		case IP_DROP_SOURCE_MEMBERSHIP:
798 			return group->DropSSM(*sourceAddr);
799 	}
800 
801 	return B_ERROR;
802 }
803 
804 
805 static status_t
806 ipv4_delta_membership(ipv4_protocol *protocol, int option,
807 	net_interface *interface, const in_addr *groupAddr,
808 	const in_addr *sourceAddr)
809 {
810 	IPv4MulticastFilter &filter = protocol->multicast_filter;
811 	IPv4GroupInterface *state = NULL;
812 	status_t status = B_OK;
813 
814 	switch (option) {
815 		case IP_ADD_MEMBERSHIP:
816 		case IP_ADD_SOURCE_MEMBERSHIP:
817 			status = filter.GetState(*groupAddr, interface, state, true);
818 			break;
819 
820 		case IP_DROP_MEMBERSHIP:
821 		case IP_BLOCK_SOURCE:
822 		case IP_UNBLOCK_SOURCE:
823 		case IP_DROP_SOURCE_MEMBERSHIP:
824 			filter.GetState(*groupAddr, interface, state, false);
825 			if (state == NULL) {
826 				if (option == IP_DROP_MEMBERSHIP
827 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
828 					return EADDRNOTAVAIL;
829 				else
830 					return EINVAL;
831 			}
832 			break;
833 	}
834 
835 	if (status < B_OK)
836 		return status;
837 
838 	status = ipv4_delta_group(state, option, interface, sourceAddr);
839 	filter.ReturnState(state);
840 	return status;
841 }
842 
843 
844 static int
845 generic_to_ipv4(int option)
846 {
847 	switch (option) {
848 		case MCAST_JOIN_GROUP:
849 			return IP_ADD_MEMBERSHIP;
850 		case MCAST_JOIN_SOURCE_GROUP:
851 			return IP_ADD_SOURCE_MEMBERSHIP;
852 		case MCAST_LEAVE_GROUP:
853 			return IP_DROP_MEMBERSHIP;
854 		case MCAST_BLOCK_SOURCE:
855 			return IP_BLOCK_SOURCE;
856 		case MCAST_UNBLOCK_SOURCE:
857 			return IP_UNBLOCK_SOURCE;
858 		case MCAST_LEAVE_SOURCE_GROUP:
859 			return IP_DROP_SOURCE_MEMBERSHIP;
860 	}
861 
862 	return -1;
863 }
864 
865 
866 static net_interface *
867 get_multicast_interface(ipv4_protocol *protocol, const in_addr *address)
868 {
869 	sockaddr_in groupAddr;
870 	net_route *route = sDatalinkModule->get_route(sDomain,
871 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
872 	if (route == NULL)
873 		return NULL;
874 
875 	return route->interface;
876 }
877 
878 
879 static status_t
880 ipv4_delta_membership(ipv4_protocol *protocol, int option,
881 	in_addr *interfaceAddr, in_addr *groupAddr, in_addr *sourceAddr)
882 {
883 	net_interface *interface = NULL;
884 
885 	if (interfaceAddr->s_addr == INADDR_ANY) {
886 		interface = get_multicast_interface(protocol, groupAddr);
887 	} else {
888 		sockaddr_in address;
889 		interface = sDatalinkModule->get_interface_with_address(sDomain,
890 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
891 	}
892 
893 	if (interface == NULL)
894 		return ENODEV;
895 
896 	return ipv4_delta_membership(protocol, option, interface,
897 		groupAddr, sourceAddr);
898 }
899 
900 
901 static status_t
902 ipv4_generic_delta_membership(ipv4_protocol *protocol, int option,
903 	uint32 index, const sockaddr_storage *_groupAddr,
904 	const sockaddr_storage *_sourceAddr)
905 {
906 	if (_groupAddr->ss_family != AF_INET)
907 		return EINVAL;
908 
909 	if (_sourceAddr && _sourceAddr->ss_family != AF_INET)
910 		return EINVAL;
911 
912 	net_interface *interface;
913 	const in_addr *groupAddr, *sourceAddr = NULL;
914 
915 	groupAddr = &((const sockaddr_in *)_groupAddr)->sin_addr;
916 
917 	if (index == 0)
918 		interface = get_multicast_interface(protocol, groupAddr);
919 	else
920 		interface = sDatalinkModule->get_interface(sDomain, index);
921 
922 	if (interface == NULL)
923 		return ENODEV;
924 
925 	if (_sourceAddr)
926 		sourceAddr = &((const sockaddr_in *)_sourceAddr)->sin_addr;
927 
928 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
929 		groupAddr, sourceAddr);
930 }
931 
932 
933 //	#pragma mark -
934 
935 
936 net_protocol *
937 ipv4_init_protocol(net_socket *socket)
938 {
939 	ipv4_protocol *protocol = new (std::nothrow) ipv4_protocol();
940 	if (protocol == NULL)
941 		return NULL;
942 
943 	protocol->raw = NULL;
944 	protocol->service_type = 0;
945 	protocol->time_to_live = kDefaultTTL;
946 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
947 	protocol->flags = 0;
948 	return protocol;
949 }
950 
951 
952 status_t
953 ipv4_uninit_protocol(net_protocol *_protocol)
954 {
955 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
956 
957 	delete protocol->raw;
958 	delete protocol;
959 	return B_OK;
960 }
961 
962 
963 /*!
964 	Since open() is only called on the top level protocol, when we get here
965 	it means we are on a SOCK_RAW socket.
966 */
967 status_t
968 ipv4_open(net_protocol *_protocol)
969 {
970 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
971 
972 	RawSocket *raw = new (std::nothrow) RawSocket(protocol->socket);
973 	if (raw == NULL)
974 		return B_NO_MEMORY;
975 
976 	status_t status = raw->InitCheck();
977 	if (status < B_OK) {
978 		delete raw;
979 		return status;
980 	}
981 
982 	TRACE_SK(protocol, "Open()");
983 
984 	protocol->raw = raw;
985 
986 	MutexLocker locker(sRawSocketsLock);
987 	sRawSockets.Add(raw);
988 	return B_OK;
989 }
990 
991 
992 status_t
993 ipv4_close(net_protocol *_protocol)
994 {
995 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
996 	RawSocket *raw = protocol->raw;
997 	if (raw == NULL)
998 		return B_ERROR;
999 
1000 	TRACE_SK(protocol, "Close()");
1001 
1002 	MutexLocker locker(sRawSocketsLock);
1003 	sRawSockets.Remove(raw);
1004 	delete raw;
1005 	protocol->raw = NULL;
1006 
1007 	return B_OK;
1008 }
1009 
1010 
1011 status_t
1012 ipv4_free(net_protocol *protocol)
1013 {
1014 	return B_OK;
1015 }
1016 
1017 
1018 status_t
1019 ipv4_connect(net_protocol *protocol, const struct sockaddr *address)
1020 {
1021 	return B_ERROR;
1022 }
1023 
1024 
1025 status_t
1026 ipv4_accept(net_protocol *protocol, struct net_socket **_acceptedSocket)
1027 {
1028 	return EOPNOTSUPP;
1029 }
1030 
1031 
1032 static status_t
1033 get_int_option(void *target, size_t length, int value)
1034 {
1035 	if (length != sizeof(int))
1036 		return B_BAD_VALUE;
1037 
1038 	return user_memcpy(target, &value, sizeof(int));
1039 }
1040 
1041 
1042 template<typename Type> static status_t
1043 set_int_option(Type &target, const void *_value, size_t length)
1044 {
1045 	int value;
1046 
1047 	if (length != sizeof(int))
1048 		return B_BAD_VALUE;
1049 
1050 	if (user_memcpy(&value, _value, sizeof(int)) < B_OK)
1051 		return B_BAD_ADDRESS;
1052 
1053 	target = value;
1054 	return B_OK;
1055 }
1056 
1057 
1058 status_t
1059 ipv4_control(net_protocol *_protocol, int level, int option, void *value,
1060 	size_t *_length)
1061 {
1062 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1063 		return sDatalinkModule->control(sDomain, option, value, _length);
1064 
1065 	return B_BAD_VALUE;
1066 }
1067 
1068 
1069 status_t
1070 ipv4_getsockopt(net_protocol *_protocol, int level, int option, void *value,
1071 	int *_length)
1072 {
1073 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1074 
1075 	if (level == IPPROTO_IP) {
1076 		if (option == IP_HDRINCL)
1077 			return get_int_option(value, *_length,
1078 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1079 		else if (option == IP_TTL)
1080 			return get_int_option(value, *_length, protocol->time_to_live);
1081 		else if (option == IP_TOS)
1082 			return get_int_option(value, *_length, protocol->service_type);
1083 		else if (IP_MULTICAST_TTL)
1084 			return get_int_option(value, *_length,
1085 				protocol->multicast_time_to_live);
1086 		else if (option == IP_ADD_MEMBERSHIP
1087 			|| option == IP_DROP_MEMBERSHIP
1088 			|| option == IP_BLOCK_SOURCE
1089 			|| option == IP_UNBLOCK_SOURCE
1090 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1091 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1092 			|| option == MCAST_JOIN_GROUP
1093 			|| option == MCAST_LEAVE_GROUP
1094 			|| option == MCAST_BLOCK_SOURCE
1095 			|| option == MCAST_UNBLOCK_SOURCE
1096 			|| option == MCAST_JOIN_SOURCE_GROUP
1097 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1098 				// RFC 3678, Section 4.1:
1099 				// ``An error of EOPNOTSUPP is returned if these options are
1100 				// used with getsockopt().''
1101 				return EOPNOTSUPP;
1102 		} else {
1103 			dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1104 			return ENOPROTOOPT;
1105 		}
1106 	}
1107 
1108 	return sSocketModule->get_option(protocol->socket, level, option, value,
1109 		_length);
1110 }
1111 
1112 
1113 status_t
1114 ipv4_setsockopt(net_protocol *_protocol, int level, int option,
1115 	const void *value, int length)
1116 {
1117 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1118 
1119 	if (level == IPPROTO_IP) {
1120 		if (option == IP_HDRINCL) {
1121 			int headerIncluded;
1122 			if (length != sizeof(int))
1123 				return B_BAD_VALUE;
1124 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded)) < B_OK)
1125 				return B_BAD_ADDRESS;
1126 
1127 			if (headerIncluded)
1128 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1129 			else
1130 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1131 			return B_OK;
1132 		} else if (option == IP_TTL) {
1133 			return set_int_option(protocol->time_to_live, value, length);
1134 		} else if (option == IP_TOS) {
1135 			return set_int_option(protocol->service_type, value, length);
1136 		} else if (option == IP_MULTICAST_TTL) {
1137 			return set_int_option(protocol->multicast_time_to_live, value,
1138 				length);
1139 		} else if (option == IP_ADD_MEMBERSHIP
1140 			|| option == IP_DROP_MEMBERSHIP) {
1141 			ip_mreq mreq;
1142 			if (length != sizeof(ip_mreq))
1143 				return B_BAD_VALUE;
1144 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) < B_OK)
1145 				return B_BAD_ADDRESS;
1146 
1147 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1148 				&mreq.imr_multiaddr, NULL);
1149 		} else if (option == IP_BLOCK_SOURCE
1150 			|| option == IP_UNBLOCK_SOURCE
1151 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1152 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1153 			ip_mreq_source mreq;
1154 			if (length != sizeof(ip_mreq_source))
1155 				return B_BAD_VALUE;
1156 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) < B_OK)
1157 				return B_BAD_ADDRESS;
1158 
1159 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1160 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1161 		} else if (option == MCAST_LEAVE_GROUP
1162 			|| option == MCAST_JOIN_GROUP) {
1163 			group_req greq;
1164 			if (length != sizeof(group_req))
1165 				return B_BAD_VALUE;
1166 			if (user_memcpy(&greq, value, sizeof(group_req)) < B_OK)
1167 				return B_BAD_ADDRESS;
1168 
1169 			return ipv4_generic_delta_membership(protocol, option,
1170 				greq.gr_interface, &greq.gr_group, NULL);
1171 		} else if (option == MCAST_BLOCK_SOURCE
1172 			|| option == MCAST_UNBLOCK_SOURCE
1173 			|| option == MCAST_JOIN_SOURCE_GROUP
1174 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1175 			group_source_req greq;
1176 			if (length != sizeof(group_source_req))
1177 				return B_BAD_VALUE;
1178 			if (user_memcpy(&greq, value, sizeof(group_source_req)) < B_OK)
1179 				return B_BAD_ADDRESS;
1180 
1181 			return ipv4_generic_delta_membership(protocol, option,
1182 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1183 		} else {
1184 			dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1185 			return ENOPROTOOPT;
1186 		}
1187 	}
1188 
1189 	return sSocketModule->set_option(protocol->socket, level, option,
1190 		value, length);
1191 }
1192 
1193 
1194 status_t
1195 ipv4_bind(net_protocol *protocol, const struct sockaddr *address)
1196 {
1197 	if (address->sa_family != AF_INET)
1198 		return EAFNOSUPPORT;
1199 
1200 	// only INADDR_ANY and addresses of local interfaces are accepted:
1201 	if (((sockaddr_in *)address)->sin_addr.s_addr == INADDR_ANY
1202 		|| IN_MULTICAST(ntohl(((sockaddr_in *)address)->sin_addr.s_addr))
1203 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1204 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1205 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1206 			// explicitly set length, as our callers can't be trusted to
1207 			// always provide the correct length!
1208 		return B_OK;
1209 	}
1210 
1211 	return B_ERROR;
1212 		// address is unknown on this host
1213 }
1214 
1215 
1216 status_t
1217 ipv4_unbind(net_protocol *protocol, struct sockaddr *address)
1218 {
1219 	// nothing to do here
1220 	return B_OK;
1221 }
1222 
1223 
1224 status_t
1225 ipv4_listen(net_protocol *protocol, int count)
1226 {
1227 	return EOPNOTSUPP;
1228 }
1229 
1230 
1231 status_t
1232 ipv4_shutdown(net_protocol *protocol, int direction)
1233 {
1234 	return EOPNOTSUPP;
1235 }
1236 
1237 
1238 status_t
1239 ipv4_send_routed_data(net_protocol *_protocol, struct net_route *route,
1240 	net_buffer *buffer)
1241 {
1242 	if (route == NULL)
1243 		return B_BAD_VALUE;
1244 
1245 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1246 	net_interface *interface = route->interface;
1247 
1248 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%ld bytes])", route, buffer,
1249 		buffer->size);
1250 
1251 	sockaddr_in &source = *(sockaddr_in *)buffer->source;
1252 	sockaddr_in &destination = *(sockaddr_in *)buffer->destination;
1253 	sockaddr_in &broadcastAddress = *(sockaddr_in *)interface->destination;
1254 
1255 	bool headerIncluded = false, checksumNeeded = true;
1256 	if (protocol != NULL)
1257 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1258 
1259 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1260 
1261 	if (destination.sin_addr.s_addr == INADDR_ANY)
1262 		return EDESTADDRREQ;
1263 	else if ((interface->device->flags & IFF_BROADCAST) != 0
1264 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1265 			|| destination.sin_addr.s_addr
1266 				== broadcastAddress.sin_addr.s_addr)) {
1267 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1268 			return B_BAD_VALUE;
1269 		buffer->flags |= MSG_BCAST;
1270 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr))) {
1271 		buffer->flags |= MSG_MCAST;
1272 	}
1273 
1274 	// Add IP header (if needed)
1275 
1276 	if (!headerIncluded) {
1277 		NetBufferPrepend<ipv4_header> header(buffer);
1278 		if (header.Status() < B_OK)
1279 			return header.Status();
1280 
1281 		header->version = IP_VERSION;
1282 		header->header_length = sizeof(ipv4_header) / 4;
1283 		header->service_type = protocol ? protocol->service_type : 0;
1284 		header->total_length = htons(buffer->size);
1285 		header->id = htons(atomic_add(&sPacketID, 1));
1286 		header->fragment_offset = 0;
1287 		if (protocol)
1288 			header->time_to_live = (buffer->flags & MSG_MCAST) ?
1289 				protocol->multicast_time_to_live : protocol->time_to_live;
1290 		else
1291 			header->time_to_live = (buffer->flags & MSG_MCAST) ?
1292 				kDefaultMulticastTTL : kDefaultTTL;
1293 		header->protocol = protocol ? protocol->socket->protocol : buffer->protocol;
1294 		header->checksum = 0;
1295 
1296 		header->source = source.sin_addr.s_addr;
1297 		header->destination = destination.sin_addr.s_addr;
1298 	} else {
1299 		// if IP_HDRINCL, check if the source address is set
1300 		NetBufferHeaderReader<ipv4_header> header(buffer);
1301 		if (header.Status() < B_OK)
1302 			return header.Status();
1303 
1304 		if (header->source == 0) {
1305 			header->source = source.sin_addr.s_addr;
1306 			header->checksum = 0;
1307 			header.Sync();
1308 		} else
1309 			checksumNeeded = false;
1310 	}
1311 
1312 	if (buffer->size > 0xffff)
1313 		return EMSGSIZE;
1314 
1315 	if (checksumNeeded)
1316 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1317 			sizeof(ipv4_header), true);
1318 
1319 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %ld, buffer checksum: %ld",
1320 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1321 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1322 
1323 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08lx",
1324 		ntohl(destination.sin_addr.s_addr));
1325 
1326 	uint32 mtu = route->mtu ? route->mtu : interface->mtu;
1327 	if (buffer->size > mtu) {
1328 		// we need to fragment the packet
1329 		return send_fragments(protocol, route, buffer, mtu);
1330 	}
1331 
1332 	return sDatalinkModule->send_data(route, buffer);
1333 }
1334 
1335 
1336 status_t
1337 ipv4_send_data(net_protocol *_protocol, net_buffer *buffer)
1338 {
1339 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1340 
1341 	TRACE_SK(protocol, "SendData(%p [%ld bytes])", buffer, buffer->size);
1342 
1343 	if (protocol && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1344 		if (buffer->size < sizeof(ipv4_header))
1345 			return EINVAL;
1346 
1347 		sockaddr_in *source = (sockaddr_in *)buffer->source;
1348 		sockaddr_in *destination = (sockaddr_in *)buffer->destination;
1349 
1350 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1351 			offsetof(ipv4_header, source)>(buffer));
1352 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1353 			offsetof(ipv4_header, destination)>(buffer));
1354 	}
1355 
1356 	return sDatalinkModule->send_datagram(protocol, sDomain, buffer);
1357 }
1358 
1359 
1360 ssize_t
1361 ipv4_send_avail(net_protocol *protocol)
1362 {
1363 	return B_ERROR;
1364 }
1365 
1366 
1367 status_t
1368 ipv4_read_data(net_protocol *_protocol, size_t numBytes, uint32 flags,
1369 	net_buffer **_buffer)
1370 {
1371 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1372 	RawSocket *raw = protocol->raw;
1373 	if (raw == NULL)
1374 		return B_ERROR;
1375 
1376 	TRACE_SK(protocol, "ReadData(%lu, 0x%lx)", numBytes, flags);
1377 
1378 	return raw->SocketDequeue(flags, _buffer);
1379 }
1380 
1381 
1382 ssize_t
1383 ipv4_read_avail(net_protocol *_protocol)
1384 {
1385 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1386 	RawSocket *raw = protocol->raw;
1387 	if (raw == NULL)
1388 		return B_ERROR;
1389 
1390 	return raw->AvailableData();
1391 }
1392 
1393 
1394 struct net_domain *
1395 ipv4_get_domain(net_protocol *protocol)
1396 {
1397 	return sDomain;
1398 }
1399 
1400 
1401 size_t
1402 ipv4_get_mtu(net_protocol *protocol, const struct sockaddr *address)
1403 {
1404 	net_route *route = sDatalinkModule->get_route(sDomain, address);
1405 	if (route == NULL)
1406 		return 0;
1407 
1408 	size_t mtu;
1409 	if (route->mtu != 0)
1410 		mtu = route->mtu;
1411 	else
1412 		mtu = route->interface->mtu;
1413 
1414 	sDatalinkModule->put_route(sDomain, route);
1415 	return mtu - sizeof(ipv4_header);
1416 }
1417 
1418 
1419 status_t
1420 ipv4_receive_data(net_buffer *buffer)
1421 {
1422 	TRACE("ReceiveData(%p [%ld bytes])", buffer, buffer->size);
1423 
1424 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1425 	if (bufferHeader.Status() < B_OK)
1426 		return bufferHeader.Status();
1427 
1428 	ipv4_header &header = bufferHeader.Data();
1429 	//dump_ipv4_header(header);
1430 
1431 	if (header.version != IP_VERSION)
1432 		return B_BAD_TYPE;
1433 
1434 	uint16 packetLength = header.TotalLength();
1435 	uint16 headerLength = header.HeaderLength();
1436 	if (packetLength > buffer->size
1437 		|| headerLength < sizeof(ipv4_header))
1438 		return B_BAD_DATA;
1439 
1440 	// TODO: would be nice to have a direct checksum function somewhere
1441 	if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1442 		return B_BAD_DATA;
1443 
1444 	struct sockaddr_in &source = *(struct sockaddr_in *)buffer->source;
1445 	struct sockaddr_in &destination = *(struct sockaddr_in *)buffer->destination;
1446 
1447 	fill_sockaddr_in(&source, header.source);
1448 	fill_sockaddr_in(&destination, header.destination);
1449 
1450 	// lower layers notion of Broadcast or Multicast have no relevance to us
1451 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1452 
1453 	if (header.destination == INADDR_BROADCAST) {
1454 		buffer->flags |= MSG_BCAST;
1455 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1456 		buffer->flags |= MSG_MCAST;
1457 	} else {
1458 		uint32 matchedAddressType = 0;
1459 		// test if the packet is really for us
1460 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1461 			&buffer->interface, &matchedAddressType)) {
1462 			TRACE("  ReceiveData(): packet was not for us %lx -> %lx",
1463 				ntohl(header.source), ntohl(header.destination));
1464 			return B_ERROR;
1465 		}
1466 
1467 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1468 		buffer->flags |= matchedAddressType;
1469 	}
1470 
1471 	uint8 protocol = buffer->protocol = header.protocol;
1472 
1473 	// remove any trailing/padding data
1474 	status_t status = gBufferModule->trim(buffer, packetLength);
1475 	if (status < B_OK)
1476 		return status;
1477 
1478 	// check for fragmentation
1479 	uint16 fragmentOffset = ntohs(header.fragment_offset);
1480 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1481 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1482 		// this is a fragment
1483 		TRACE("  ReceiveData(): Found a Fragment!");
1484 		status = reassemble_fragments(header, &buffer);
1485 		TRACE("  ReceiveData():  -> %s", strerror(status));
1486 		if (status != B_OK)
1487 			return status;
1488 
1489 		if (buffer == NULL) {
1490 			// buffer was put into fragment packet
1491 			TRACE("  ReceiveData(): Not yet assembled.");
1492 			return B_OK;
1493 		}
1494 	}
1495 
1496 	// Since the buffer might have been changed (reassembled fragment)
1497 	// we must no longer access bufferHeader or header anymore after
1498 	// this point
1499 
1500 	raw_receive_data(buffer);
1501 
1502 	gBufferModule->remove_header(buffer, headerLength);
1503 		// the header is of variable size and may include IP options
1504 		// (that we ignore for now)
1505 
1506 	net_protocol_module_info *module = receiving_protocol(protocol);
1507 	if (module == NULL) {
1508 		// no handler for this packet
1509 		return EAFNOSUPPORT;
1510 	}
1511 
1512 	if (buffer->flags & MSG_MCAST) {
1513 		// Unfortunely historical reasons dictate that the IP multicast
1514 		// model be a little different from the unicast one. We deliver
1515 		// this frame directly to all sockets registered with interest
1516 		// for this multicast group.
1517 		return deliver_multicast(module, buffer, false);
1518 	}
1519 
1520 	return module->receive_data(buffer);
1521 }
1522 
1523 
1524 status_t
1525 ipv4_deliver_data(net_protocol *_protocol, net_buffer *buffer)
1526 {
1527 	ipv4_protocol *protocol = (ipv4_protocol *)_protocol;
1528 
1529 	if (protocol->raw == NULL)
1530 		return B_ERROR;
1531 
1532 	return protocol->raw->SocketEnqueue(buffer);
1533 }
1534 
1535 
1536 status_t
1537 ipv4_error(uint32 code, net_buffer *data)
1538 {
1539 	return B_ERROR;
1540 }
1541 
1542 
1543 status_t
1544 ipv4_error_reply(net_protocol *protocol, net_buffer *causedError, uint32 code,
1545 	void *errorData)
1546 {
1547 	return B_ERROR;
1548 }
1549 
1550 
1551 static int
1552 dump_ipv4_multicast(int argc, char *argv[])
1553 {
1554 	MulticastState::Iterator it = sMulticastState->GetIterator();
1555 
1556 	while (it.HasNext()) {
1557 		IPv4GroupInterface *state = it.Next();
1558 
1559 		char addrBuf[64];
1560 
1561 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
1562 			print_address(&state->Address(), addrBuf, sizeof(addrBuf)),
1563 			state->Mode() == IPv4GroupInterface::kExclude ?  "Exclude" :
1564 			"Include");
1565 
1566 		int count = 0;
1567 		IPv4GroupInterface::AddressSet::Iterator it =
1568 			state->Sources().GetIterator();
1569 		while (it.HasNext()) {
1570 			kprintf("%s%s", count > 0 ? ", " : "", print_address(&it.Next(),
1571 				addrBuf, sizeof(addrBuf)));
1572 			count++;
1573 		}
1574 
1575 		kprintf("}> sock %p\n", state->Parent()->Socket());
1576 	}
1577 
1578 	return 0;
1579 }
1580 
1581 
1582 //	#pragma mark -
1583 
1584 
1585 status_t
1586 init_ipv4()
1587 {
1588 	sPacketID = (int32)system_time();
1589 
1590 	mutex_init(&sRawSocketsLock, "raw sockets");
1591 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1592 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1593 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1594 
1595 	status_t status;
1596 
1597 	sMulticastState = new MulticastState();
1598 	if (sMulticastState == NULL) {
1599 		status = B_NO_MEMORY;
1600 		goto err4;
1601 	}
1602 
1603 	status = sMulticastState->Init();
1604 	if (status < B_OK)
1605 		goto err5;
1606 
1607 	sFragmentHash = hash_init(MAX_HASH_FRAGMENTS, FragmentPacket::NextOffset(),
1608 		&FragmentPacket::Compare, &FragmentPacket::Hash);
1609 	if (sFragmentHash == NULL)
1610 		goto err5;
1611 
1612 	new (&sRawSockets) RawSocketList;
1613 		// static initializers do not work in the kernel,
1614 		// so we have to do it here, manually
1615 		// TODO: for modules, this shouldn't be required
1616 
1617 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
1618 		"network/protocols/ipv4/v1", NULL);
1619 	if (status < B_OK)
1620 		goto err6;
1621 
1622 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
1623 		&gIPv4AddressModule, &sDomain);
1624 	if (status < B_OK)
1625 		goto err6;
1626 
1627 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
1628 		"list all current IPv4 multicast states");
1629 
1630 	return B_OK;
1631 
1632 err6:
1633 	hash_uninit(sFragmentHash);
1634 err5:
1635 	delete sMulticastState;
1636 err4:
1637 	mutex_destroy(&sReceivingProtocolLock);
1638 	mutex_destroy(&sMulticastGroupsLock);
1639 	mutex_destroy(&sFragmentLock);
1640 	mutex_destroy(&sRawSocketsLock);
1641 	return status;
1642 }
1643 
1644 
1645 status_t
1646 uninit_ipv4()
1647 {
1648 	mutex_lock(&sReceivingProtocolLock);
1649 
1650 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
1651 
1652 	// put all the domain receiving protocols we gathered so far
1653 	for (uint32 i = 0; i < 256; i++) {
1654 		if (sReceivingProtocol[i] != NULL)
1655 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1656 	}
1657 
1658 	gStackModule->unregister_domain(sDomain);
1659 	mutex_unlock(&sReceivingProtocolLock);
1660 
1661 	delete sMulticastState;
1662 	hash_uninit(sFragmentHash);
1663 
1664 	mutex_destroy(&sMulticastGroupsLock);
1665 	mutex_destroy(&sFragmentLock);
1666 	mutex_destroy(&sRawSocketsLock);
1667 	mutex_destroy(&sReceivingProtocolLock);
1668 
1669 	return B_OK;
1670 }
1671 
1672 
1673 static status_t
1674 ipv4_std_ops(int32 op, ...)
1675 {
1676 	switch (op) {
1677 		case B_MODULE_INIT:
1678 			return init_ipv4();
1679 		case B_MODULE_UNINIT:
1680 			return uninit_ipv4();
1681 
1682 		default:
1683 			return B_ERROR;
1684 	}
1685 }
1686 
1687 
1688 net_protocol_module_info gIPv4Module = {
1689 	{
1690 		"network/protocols/ipv4/v1",
1691 		0,
1692 		ipv4_std_ops
1693 	},
1694 	NET_PROTOCOL_ATOMIC_MESSAGES,
1695 
1696 	ipv4_init_protocol,
1697 	ipv4_uninit_protocol,
1698 	ipv4_open,
1699 	ipv4_close,
1700 	ipv4_free,
1701 	ipv4_connect,
1702 	ipv4_accept,
1703 	ipv4_control,
1704 	ipv4_getsockopt,
1705 	ipv4_setsockopt,
1706 	ipv4_bind,
1707 	ipv4_unbind,
1708 	ipv4_listen,
1709 	ipv4_shutdown,
1710 	ipv4_send_data,
1711 	ipv4_send_routed_data,
1712 	ipv4_send_avail,
1713 	ipv4_read_data,
1714 	ipv4_read_avail,
1715 	ipv4_get_domain,
1716 	ipv4_get_mtu,
1717 	ipv4_receive_data,
1718 	ipv4_deliver_data,
1719 	ipv4_error,
1720 	ipv4_error_reply,
1721 };
1722 
1723 module_dependency module_dependencies[] = {
1724 	{NET_STACK_MODULE_NAME, (module_info **)&gStackModule},
1725 	{NET_BUFFER_MODULE_NAME, (module_info **)&gBufferModule},
1726 	{NET_DATALINK_MODULE_NAME, (module_info **)&sDatalinkModule},
1727 	{NET_SOCKET_MODULE_NAME, (module_info **)&sSocketModule},
1728 	{}
1729 };
1730 
1731 module_info *modules[] = {
1732 	(module_info *)&gIPv4Module,
1733 	NULL
1734 };
1735