xref: /haiku/src/add-ons/kernel/network/protocols/ipv4/ipv4.cpp (revision eea5774f46bba925156498abf9cb1a1165647bf7)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  */
8 
9 
10 #include "ipv4.h"
11 #include "ipv4_address.h"
12 #include "multicast.h"
13 
14 #include <net_datalink.h>
15 #include <net_datalink_protocol.h>
16 #include <net_device.h>
17 #include <net_protocol.h>
18 #include <net_stack.h>
19 #include <NetBufferUtilities.h>
20 #include <ProtocolUtilities.h>
21 
22 #include <KernelExport.h>
23 #include <util/AutoLock.h>
24 #include <util/list.h>
25 #include <util/DoublyLinkedList.h>
26 #include <util/MultiHashTable.h>
27 
28 #include <netinet/in.h>
29 #include <netinet/ip.h>
30 #include <new>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <utility>
35 
36 
37 //#define TRACE_IPV4
38 #ifdef TRACE_IPV4
39 #	define TRACE(format, args...) \
40 		dprintf("IPv4 [%" B_PRIdBIGTIME "] " format "\n", system_time() , \
41 			##args)
42 #	define TRACE_SK(protocol, format, args...) \
43 		dprintf("IPv4 [%" B_PRIdBIGTIME "] %p " format "\n", system_time(), \
44 			protocol , ##args)
45 #	define TRACE_ONLY(x) x
46 #else
47 #	define TRACE(args...) ;
48 #	define TRACE_SK(args...) ;
49 #	define TRACE_ONLY(x)
50 #endif
51 
52 
53 #define MAX_HASH_FRAGMENTS 		64
54 	// slots in the fragment packet's hash
55 #define FRAGMENT_TIMEOUT		60000000LL
56 	// discard fragment after 60 seconds
57 
58 
59 typedef DoublyLinkedList<struct net_buffer,
60 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
61 
62 typedef NetBufferField<uint16, offsetof(ipv4_header, checksum)> IPChecksumField;
63 
64 struct ipv4_packet_key {
65 	in_addr_t	source;
66 	in_addr_t	destination;
67 	uint16		id;
68 	uint8		protocol;
69 };
70 
71 
72 class FragmentPacket {
73 public:
74 								FragmentPacket(const ipv4_packet_key& key);
75 								~FragmentPacket();
76 
77 			status_t			AddFragment(uint16 start, uint16 end,
78 									net_buffer* buffer, bool lastFragment);
79 			status_t			Reassemble(net_buffer* to);
80 
81 			bool				IsComplete() const
82 									{ return fReceivedLastFragment
83 										&& fBytesLeft == 0; }
84 
85 			const ipv4_packet_key& Key() const { return fKey; }
86 			FragmentPacket*&	HashTableLink() { return fNext; }
87 
88 	static	void				StaleTimer(struct net_timer* timer, void* data);
89 
90 private:
91 			FragmentPacket*		fNext;
92 			struct ipv4_packet_key fKey;
93 			uint32				fIndex;
94 			bool				fReceivedLastFragment;
95 			int32				fBytesLeft;
96 			FragmentList		fFragments;
97 			net_timer			fTimer;
98 };
99 
100 
101 struct FragmentHashDefinition {
102 	typedef ipv4_packet_key KeyType;
103 	typedef FragmentPacket ValueType;
104 
105 	size_t HashKey(const KeyType& key) const
106 	{
107 		return (key.source ^ key.destination ^ key.protocol ^ key.id);
108 	}
109 
110 	size_t Hash(ValueType* value) const
111 	{
112 		return HashKey(value->Key());
113 	}
114 
115 	bool Compare(const KeyType& key, ValueType* value) const
116 	{
117 		const ipv4_packet_key& packetKey = value->Key();
118 
119 		return packetKey.id == key.id
120 			&& packetKey.source == key.source
121 			&& packetKey.destination == key.destination
122 			&& packetKey.protocol == key.protocol;
123 	}
124 
125 	ValueType*& GetLink(ValueType* value) const
126 	{
127 		return value->HashTableLink();
128 	}
129 };
130 
131 typedef BOpenHashTable<FragmentHashDefinition, false, true> FragmentTable;
132 
133 
134 class RawSocket
135 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
136 public:
137 								RawSocket(net_socket* socket);
138 };
139 
140 typedef DoublyLinkedList<RawSocket> RawSocketList;
141 
142 typedef MulticastGroupInterface<IPv4Multicast> IPv4GroupInterface;
143 typedef MulticastFilter<IPv4Multicast> IPv4MulticastFilter;
144 
145 struct MulticastStateHash {
146 	typedef std::pair<const in_addr* , uint32> KeyType;
147 	typedef IPv4GroupInterface ValueType;
148 
149 	size_t HashKey(const KeyType &key) const
150 		{ return key.first->s_addr ^ key.second; }
151 	size_t Hash(ValueType* value) const
152 		{ return HashKey(std::make_pair(&value->Address(),
153 			value->Interface()->index)); }
154 	bool Compare(const KeyType &key, ValueType* value) const
155 		{ return value->Interface()->index == key.second
156 			&& value->Address().s_addr == key.first->s_addr; }
157 	bool CompareValues(ValueType* value1, ValueType* value2) const
158 		{ return value1->Interface()->index == value2->Interface()->index
159 			&& value1->Address().s_addr == value2->Address().s_addr; }
160 	ValueType*& GetLink(ValueType* value) const { return value->MulticastGroupsHashLink(); }
161 };
162 
163 
164 struct ipv4_protocol : net_protocol {
165 	ipv4_protocol()
166 		:
167 		raw(NULL),
168 		multicast_filter(this)
169 	{
170 	}
171 
172 	~ipv4_protocol()
173 	{
174 		delete raw;
175 	}
176 
177 	RawSocket*			raw;
178 	uint8				service_type;
179 	uint8				time_to_live;
180 	uint8				multicast_time_to_live;
181 	bool				multicast_loopback;
182 	uint32				flags;
183 	struct sockaddr*	multicast_address; // for IP_MULTICAST_IF
184 
185 	IPv4MulticastFilter	multicast_filter;
186 };
187 
188 // protocol flags
189 #define IP_FLAG_HEADER_INCLUDED		0x01
190 #define IP_FLAG_RECEIVE_DEST_ADDR	0x02
191 
192 
193 static const int kDefaultTTL = 254;
194 static const int kDefaultMulticastTTL = 1;
195 static const bool kDefaultMulticastLoopback = true;
196 
197 
198 extern net_protocol_module_info gIPv4Module;
199 	// we need this in ipv4_std_ops() for registering the AF_INET domain
200 
201 net_stack_module_info* gStackModule;
202 net_buffer_module_info* gBufferModule;
203 
204 static struct net_domain* sDomain;
205 static net_datalink_module_info* sDatalinkModule;
206 static net_socket_module_info* sSocketModule;
207 static int32 sPacketID;
208 static RawSocketList sRawSockets;
209 static mutex sRawSocketsLock;
210 static mutex sFragmentLock;
211 static FragmentTable sFragmentHash;
212 static mutex sMulticastGroupsLock;
213 
214 typedef MultiHashTable<MulticastStateHash> MulticastState;
215 static MulticastState* sMulticastState;
216 
217 static net_protocol_module_info* sReceivingProtocol[256];
218 static mutex sReceivingProtocolLock;
219 
220 
221 static const char*
222 print_address(const in_addr* address, char* buf, size_t bufLen)
223 {
224 	unsigned int addr = ntohl(address->s_addr);
225 
226 	snprintf(buf, bufLen, "%u.%u.%u.%u", (addr >> 24) & 0xff,
227 		(addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff);
228 
229 	return buf;
230 }
231 
232 
233 RawSocket::RawSocket(net_socket* socket)
234 	:
235 	DatagramSocket<>("ipv4 raw socket", socket)
236 {
237 }
238 
239 
240 //	#pragma mark -
241 
242 
243 FragmentPacket::FragmentPacket(const ipv4_packet_key& key)
244 	:
245 	fKey(key),
246 	fIndex(0),
247 	fReceivedLastFragment(false),
248 	fBytesLeft(IP_MAXPACKET)
249 {
250 	gStackModule->init_timer(&fTimer, FragmentPacket::StaleTimer, this);
251 }
252 
253 
254 FragmentPacket::~FragmentPacket()
255 {
256 	// cancel the kill timer
257 	gStackModule->set_timer(&fTimer, -1);
258 
259 	// delete all fragments
260 	net_buffer* buffer;
261 	while ((buffer = fFragments.RemoveHead()) != NULL) {
262 		gBufferModule->free(buffer);
263 	}
264 }
265 
266 
267 status_t
268 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
269 	bool lastFragment)
270 {
271 	// restart the timer
272 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
273 
274 	if (start >= end) {
275 		// invalid fragment
276 		return B_BAD_DATA;
277 	}
278 
279 	// Search for a position in the list to insert the fragment
280 
281 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
282 	net_buffer* previous = NULL;
283 	net_buffer* next = NULL;
284 	while ((previous = iterator.Next()) != NULL) {
285 		if (previous->fragment.start <= start) {
286 			// The new fragment can be inserted after this one
287 			break;
288 		}
289 
290 		next = previous;
291 	}
292 
293 	// See if we already have the fragment's data
294 
295 	if (previous != NULL && previous->fragment.start <= start
296 		&& previous->fragment.end >= end) {
297 		// we do, so we can just drop this fragment
298 		gBufferModule->free(buffer);
299 		return B_OK;
300 	}
301 
302 	fIndex = buffer->index;
303 		// adopt the buffer's device index
304 
305 	TRACE("    previous: %p, next: %p", previous, next);
306 
307 	// If we have parts of the data already, truncate as needed
308 
309 	if (previous != NULL && previous->fragment.end > start) {
310 		TRACE("    remove header %d bytes", previous->fragment.end - start);
311 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
312 		start = previous->fragment.end;
313 	}
314 	if (next != NULL && end > next->fragment.start) {
315 		TRACE("    remove trailer %d bytes", end - next->fragment.start);
316 		gBufferModule->remove_trailer(buffer, end - next->fragment.start);
317 		end = next->fragment.start;
318 	}
319 
320 	// Now try if we can already merge the fragments together
321 
322 	// We will always keep the last buffer received, so that we can still
323 	// report an error (in which case we're not responsible for freeing it)
324 
325 	if (previous != NULL && previous->fragment.end == start) {
326 		fFragments.Remove(previous);
327 
328 		buffer->fragment.start = previous->fragment.start;
329 		buffer->fragment.end = end;
330 
331 		status_t status = gBufferModule->merge(buffer, previous, false);
332 		TRACE("    merge previous: %s", strerror(status));
333 		if (status != B_OK) {
334 			fFragments.InsertBefore(next, previous);
335 			return status;
336 		}
337 
338 		fFragments.InsertBefore(next, buffer);
339 
340 		// cut down existing hole
341 		fBytesLeft -= end - start;
342 
343 		if (lastFragment && !fReceivedLastFragment) {
344 			fReceivedLastFragment = true;
345 			fBytesLeft -= IP_MAXPACKET - end;
346 		}
347 
348 		TRACE("    hole length: %d", (int)fBytesLeft);
349 
350 		return B_OK;
351 	} else if (next != NULL && next->fragment.start == end) {
352 		net_buffer* afterNext = (net_buffer*)next->link.next;
353 		fFragments.Remove(next);
354 
355 		buffer->fragment.start = start;
356 		buffer->fragment.end = next->fragment.end;
357 
358 		status_t status = gBufferModule->merge(buffer, next, true);
359 		TRACE("    merge next: %s", strerror(status));
360 		if (status != B_OK) {
361 			// Insert "next" at its previous position
362 			fFragments.InsertBefore(afterNext, next);
363 			return status;
364 		}
365 
366 		fFragments.InsertBefore(afterNext, buffer);
367 
368 		// cut down existing hole
369 		fBytesLeft -= end - start;
370 
371 		if (lastFragment && !fReceivedLastFragment) {
372 			fReceivedLastFragment = true;
373 			fBytesLeft -= IP_MAXPACKET - end;
374 		}
375 
376 		TRACE("    hole length: %d", (int)fBytesLeft);
377 
378 		return B_OK;
379 	}
380 
381 	// We couldn't merge the fragments, so we need to add it as is
382 
383 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
384 
385 	buffer->fragment.start = start;
386 	buffer->fragment.end = end;
387 	fFragments.InsertBefore(next, buffer);
388 
389 	// update length of the hole, if any
390 	fBytesLeft -= end - start;
391 
392 	if (lastFragment && !fReceivedLastFragment) {
393 		fReceivedLastFragment = true;
394 		fBytesLeft -= IP_MAXPACKET - end;
395 	}
396 
397 	TRACE("    hole length: %d", (int)fBytesLeft);
398 
399 	return B_OK;
400 }
401 
402 
403 /*!	Reassembles the fragments to the specified buffer \a to.
404 	This buffer must have been added via AddFragment() before.
405 */
406 status_t
407 FragmentPacket::Reassemble(net_buffer* to)
408 {
409 	if (!IsComplete())
410 		return B_ERROR;
411 
412 	net_buffer* buffer = NULL;
413 
414 	net_buffer* fragment;
415 	while ((fragment = fFragments.RemoveHead()) != NULL) {
416 		if (buffer != NULL) {
417 			status_t status;
418 			if (to == fragment) {
419 				status = gBufferModule->merge(fragment, buffer, false);
420 				buffer = fragment;
421 			} else
422 				status = gBufferModule->merge(buffer, fragment, true);
423 			if (status != B_OK)
424 				return status;
425 		} else
426 			buffer = fragment;
427 	}
428 
429 	if (buffer != to)
430 		panic("ipv4 packet reassembly did not work correctly.");
431 
432 	to->index = fIndex;
433 		// reset the buffer's device index
434 
435 	return B_OK;
436 }
437 
438 
439 /*static*/ void
440 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
441 {
442 	FragmentPacket* packet = (FragmentPacket*)data;
443 	TRACE("Assembling FragmentPacket %p timed out!", packet);
444 
445 	MutexLocker locker(&sFragmentLock);
446 	sFragmentHash.Remove(packet);
447 	locker.Unlock();
448 
449 	if (!packet->fFragments.IsEmpty()) {
450 		// Send error: fragment reassembly time exceeded
451 		sDomain->module->error_reply(NULL, packet->fFragments.First(),
452 			B_NET_ERROR_REASSEMBLY_TIME_EXCEEDED, NULL);
453 	}
454 
455 	delete packet;
456 }
457 
458 
459 //	#pragma mark -
460 
461 
462 #ifdef TRACE_IPV4
463 static void
464 dump_ipv4_header(ipv4_header &header)
465 {
466 	struct pretty_ipv4 {
467 	#if B_HOST_IS_LENDIAN == 1
468 		uint8 a;
469 		uint8 b;
470 		uint8 c;
471 		uint8 d;
472 	#else
473 		uint8 d;
474 		uint8 c;
475 		uint8 b;
476 		uint8 a;
477 	#endif
478 	};
479 	struct pretty_ipv4* src = (struct pretty_ipv4*)&header.source;
480 	struct pretty_ipv4* dst = (struct pretty_ipv4*)&header.destination;
481 	dprintf("  version: %d\n", header.version);
482 	dprintf("  header_length: 4 * %d\n", header.header_length);
483 	dprintf("  service_type: %d\n", header.service_type);
484 	dprintf("  total_length: %d\n", header.TotalLength());
485 	dprintf("  id: %d\n", ntohs(header.id));
486 	dprintf("  fragment_offset: %d (flags: %c%c%c)\n",
487 		header.FragmentOffset() & IP_FRAGMENT_OFFSET_MASK,
488 		(header.FragmentOffset() & IP_RESERVED_FLAG) ? 'r' : '-',
489 		(header.FragmentOffset() & IP_DONT_FRAGMENT) ? 'd' : '-',
490 		(header.FragmentOffset() & IP_MORE_FRAGMENTS) ? 'm' : '-');
491 	dprintf("  time_to_live: %d\n", header.time_to_live);
492 	dprintf("  protocol: %d\n", header.protocol);
493 	dprintf("  checksum: %d\n", ntohs(header.checksum));
494 	dprintf("  source: %d.%d.%d.%d\n", src->a, src->b, src->c, src->d);
495 	dprintf("  destination: %d.%d.%d.%d\n", dst->a, dst->b, dst->c, dst->d);
496 }
497 #endif	// TRACE_IPV4
498 
499 
500 static int
501 dump_ipv4_multicast(int argc, char** argv)
502 {
503 	MulticastState::Iterator groupIterator = sMulticastState->GetIterator();
504 
505 	while (groupIterator.HasNext()) {
506 		IPv4GroupInterface* state = groupIterator.Next();
507 
508 		char addressBuffer[64];
509 
510 		kprintf("%p: group <%s, %s, %s {", state, state->Interface()->name,
511 			print_address(&state->Address(), addressBuffer,
512 			sizeof(addressBuffer)),
513 			state->Mode() == IPv4GroupInterface::kExclude
514 				? "Exclude" : "Include");
515 
516 		int count = 0;
517 		IPv4GroupInterface::AddressSet::Iterator addressIterator
518 			= state->Sources().GetIterator();
519 		while (addressIterator.HasNext()) {
520 			kprintf("%s%s", count > 0 ? ", " : "",
521 				print_address(&addressIterator.Next(),
522 				addressBuffer, sizeof(addressBuffer)));
523 			count++;
524 		}
525 
526 		kprintf("}> sock %p\n", state->Parent()->Socket());
527 	}
528 
529 	return 0;
530 }
531 
532 
533 /*!	Attempts to re-assemble fragmented packets.
534 	\return B_OK if everything went well; if it could reassemble the packet, \a _buffer
535 		will point to its buffer, otherwise, it will be \c NULL.
536 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
537 */
538 static status_t
539 reassemble_fragments(const ipv4_header &header, net_buffer** _buffer)
540 {
541 	net_buffer* buffer = *_buffer;
542 	status_t status;
543 
544 	struct ipv4_packet_key key;
545 	key.source = (in_addr_t)header.source;
546 	key.destination = (in_addr_t)header.destination;
547 	key.id = header.id;
548 	key.protocol = header.protocol;
549 
550 	// TODO: Make locking finer grained.
551 	MutexLocker locker(&sFragmentLock);
552 
553 	FragmentPacket* packet = sFragmentHash.Lookup(key);
554 	if (packet == NULL) {
555 		// New fragment packet
556 		packet = new (std::nothrow) FragmentPacket(key);
557 		if (packet == NULL)
558 			return B_NO_MEMORY;
559 
560 		// add packet to hash
561 		status = sFragmentHash.Insert(packet);
562 		if (status != B_OK) {
563 			delete packet;
564 			return status;
565 		}
566 	}
567 
568 	uint16 fragmentOffset = header.FragmentOffset();
569 	uint16 start = (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) << 3;
570 	uint16 end = start + header.TotalLength() - header.HeaderLength();
571 	bool lastFragment = (fragmentOffset & IP_MORE_FRAGMENTS) == 0;
572 
573 	TRACE("   Received IPv4 %sfragment of size %d, offset %d.",
574 		lastFragment ? "last ": "", end - start, start);
575 
576 	// Remove header unless this is the first fragment
577 	if (start != 0)
578 		gBufferModule->remove_header(buffer, header.HeaderLength());
579 
580 	status = packet->AddFragment(start, end, buffer, lastFragment);
581 	if (status != B_OK)
582 		return status;
583 
584 	if (packet->IsComplete()) {
585 		sFragmentHash.Remove(packet);
586 			// no matter if reassembling succeeds, we won't need this packet
587 			// anymore
588 
589 		status = packet->Reassemble(buffer);
590 		delete packet;
591 
592 		// _buffer does not change
593 		return status;
594 	}
595 
596 	// This indicates that the packet is not yet complete
597 	*_buffer = NULL;
598 	return B_OK;
599 }
600 
601 
602 /*!	Fragments the incoming buffer and send all fragments via the specified
603 	\a route.
604 */
605 static status_t
606 send_fragments(ipv4_protocol* protocol, struct net_route* route,
607 	net_buffer* buffer, uint32 mtu)
608 {
609 	TRACE_SK(protocol, "SendFragments(%" B_PRIu32 " bytes, mtu %" B_PRIu32 ")",
610 		buffer->size, mtu);
611 
612 	NetBufferHeaderReader<ipv4_header> originalHeader(buffer);
613 	if (originalHeader.Status() != B_OK)
614 		return originalHeader.Status();
615 
616 	uint16 headerLength = originalHeader->HeaderLength();
617 	uint32 bytesLeft = buffer->size - headerLength;
618 	uint32 fragmentOffset = 0;
619 	status_t status = B_OK;
620 
621 	net_buffer* headerBuffer = gBufferModule->split(buffer, headerLength);
622 	if (headerBuffer == NULL)
623 		return B_NO_MEMORY;
624 
625 	// TODO: we need to make sure ipv4_header is contiguous or
626 	// use another construct.
627 	NetBufferHeaderReader<ipv4_header> bufferHeader(headerBuffer);
628 	ipv4_header* header = &bufferHeader.Data();
629 
630 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
631 	// this way)
632 	mtu -= headerLength;
633 	mtu &= ~7;
634 	TRACE("  adjusted MTU to %" B_PRIu32 ", bytesLeft %" B_PRIu32, mtu,
635 		bytesLeft);
636 
637 	while (bytesLeft > 0) {
638 		uint32 fragmentLength = min_c(bytesLeft, mtu);
639 		bytesLeft -= fragmentLength;
640 		bool lastFragment = bytesLeft == 0;
641 
642 		header->total_length = htons(fragmentLength + headerLength);
643 		header->fragment_offset = htons((lastFragment ? 0 : IP_MORE_FRAGMENTS)
644 			| (fragmentOffset >> 3));
645 		header->checksum = 0;
646 		header->checksum = gStackModule->checksum((uint8*)header,
647 			headerLength);
648 			// TODO: compute the checksum only for those parts that changed?
649 
650 		TRACE("  send fragment of %" B_PRIu32 " bytes (%" B_PRIu32 " bytes "
651 			"left)", fragmentLength, bytesLeft);
652 
653 		net_buffer* fragmentBuffer;
654 		if (!lastFragment) {
655 			fragmentBuffer = gBufferModule->split(buffer, fragmentLength);
656 			fragmentOffset += fragmentLength;
657 		} else
658 			fragmentBuffer = buffer;
659 
660 		if (fragmentBuffer == NULL) {
661 			status = B_NO_MEMORY;
662 			break;
663 		}
664 
665 		// copy header to fragment
666 		status = gBufferModule->prepend(fragmentBuffer, header, headerLength);
667 
668 		// send fragment
669 		if (status == B_OK)
670 			status = sDatalinkModule->send_routed_data(route, fragmentBuffer);
671 
672 		if (lastFragment) {
673 			// we don't own the last buffer, so we don't have to free it
674 			break;
675 		}
676 
677 		if (status != B_OK) {
678 			gBufferModule->free(fragmentBuffer);
679 			break;
680 		}
681 	}
682 
683 	gBufferModule->free(headerBuffer);
684 	return status;
685 }
686 
687 
688 status_t ipv4_receive_data(net_buffer* buffer);
689 
690 
691 /*!	Delivers the provided \a buffer to all listeners of this multicast group.
692 	Does not take over ownership of the buffer.
693 */
694 static bool
695 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
696 	bool deliverToRaw)
697 {
698 	TRACE("deliver_multicast(%p [%" B_PRIu32 " bytes])", buffer, buffer->size);
699 	if (module->deliver_data == NULL)
700 		return false;
701 
702 	MutexLocker _(sMulticastGroupsLock);
703 
704 	sockaddr_in* multicastAddr = (sockaddr_in*)buffer->destination;
705 
706 	uint32 index = buffer->index;
707 	if (buffer->interface_address != NULL)
708 		index = buffer->interface_address->interface->index;
709 
710 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
711 		&multicastAddr->sin_addr, index));
712 
713 	size_t count = 0;
714 
715 	while (it.HasNext()) {
716 		IPv4GroupInterface* state = it.Next();
717 
718 		ipv4_protocol* ipProtocol = state->Parent()->Socket();
719 		if (deliverToRaw && (ipProtocol->raw == NULL
720 				|| ipProtocol->socket->protocol != buffer->protocol))
721 			continue;
722 
723 		if (state->FilterAccepts(buffer)) {
724 			net_protocol* protocol = ipProtocol;
725 			if (protocol->module != module) {
726 				// as multicast filters are installed with an IPv4 protocol
727 				// reference, we need to go and find the appropriate instance
728 				// related to the 'receiving protocol' with module 'module'.
729 				protocol = ipProtocol->socket->first_protocol;
730 
731 				while (protocol != NULL && protocol->module != module)
732 					protocol = protocol->next;
733 			}
734 
735 			if (protocol != NULL) {
736 				module->deliver_data(protocol, buffer);
737 				count++;
738 			}
739 		}
740 	}
741 
742 	return count > 0;
743 }
744 
745 
746 /*!	Delivers the buffer to all listening raw sockets without taking ownership of
747 	the provided \a buffer.
748 	Returns \c true if there was any receiver, \c false if not.
749 */
750 static bool
751 raw_receive_data(net_buffer* buffer)
752 {
753 	MutexLocker locker(sRawSocketsLock);
754 
755 	if (sRawSockets.IsEmpty())
756 		return false;
757 
758 	TRACE("RawReceiveData(%i)", buffer->protocol);
759 
760 	if ((buffer->msg_flags & MSG_MCAST) != 0) {
761 		// we need to call deliver_multicast here separately as
762 		// buffer still has the IP header, and it won't in the
763 		// next call. This isn't very optimized but works for now.
764 		// A better solution would be to hold separate hash tables
765 		// and lists for RAW and non-RAW sockets.
766 		return deliver_multicast(&gIPv4Module, buffer, true);
767 	}
768 
769 	RawSocketList::Iterator iterator = sRawSockets.GetIterator();
770 	size_t count = 0;
771 
772 	while (iterator.HasNext()) {
773 		RawSocket* raw = iterator.Next();
774 
775 		if (raw->Socket()->protocol == buffer->protocol) {
776 			raw->EnqueueClone(buffer);
777 			count++;
778 		}
779 	}
780 
781 	return count > 0;
782 }
783 
784 
785 static inline sockaddr*
786 fill_sockaddr_in(sockaddr_in* target, in_addr_t address)
787 {
788 	target->sin_family = AF_INET;
789 	target->sin_len = sizeof(sockaddr_in);
790 	target->sin_port = 0;
791 	target->sin_addr.s_addr = address;
792 	return (sockaddr*)target;
793 }
794 
795 
796 static status_t
797 get_int_option(void* target, size_t length, int value)
798 {
799 	if (length != sizeof(int))
800 		return B_BAD_VALUE;
801 
802 	return user_memcpy(target, &value, sizeof(int));
803 }
804 
805 
806 static status_t
807 get_char_int_option(void* target, size_t length, int value)
808 {
809 	if (length == sizeof(int))
810 		return user_memcpy(target, &value, sizeof(int));
811 	if (length == sizeof(unsigned char)) {
812 		unsigned char uvalue = value;
813 		return user_memcpy(target, &uvalue, sizeof(uvalue));
814 	}
815 	return B_BAD_VALUE;
816 }
817 
818 
819 template<typename Type> static status_t
820 set_int_option(Type &target, const void* _value, size_t length)
821 {
822 	int value;
823 
824 	if (length != sizeof(int))
825 		return B_BAD_VALUE;
826 
827 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
828 		return B_BAD_ADDRESS;
829 
830 	target = value;
831 	return B_OK;
832 }
833 
834 
835 template<typename Type> static status_t
836 set_char_int_option(Type &target, const void* _value, size_t length)
837 {
838 	if (length == sizeof(int)) {
839 		int value;
840 		if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
841 			return B_BAD_ADDRESS;
842 		if (value > 255)
843 			return B_BAD_VALUE;
844 		target = value;
845 		return B_OK;
846 	}
847 	if (length == sizeof(unsigned char)) {
848 		unsigned char value;
849 		if (user_memcpy(&value, _value, sizeof(value)) != B_OK)
850 			return B_BAD_ADDRESS;
851 
852 		target = value;
853 		return B_OK;
854 	}
855 	return B_BAD_VALUE;
856 }
857 
858 
859 static net_protocol_module_info*
860 receiving_protocol(uint8 protocol)
861 {
862 	net_protocol_module_info* module = sReceivingProtocol[protocol];
863 	if (module != NULL)
864 		return module;
865 
866 	MutexLocker locker(sReceivingProtocolLock);
867 
868 	module = sReceivingProtocol[protocol];
869 	if (module != NULL)
870 		return module;
871 
872 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
873 			&module) == B_OK)
874 		sReceivingProtocol[protocol] = module;
875 
876 	return module;
877 }
878 
879 
880 // #pragma mark - multicast
881 
882 
883 status_t
884 IPv4Multicast::JoinGroup(IPv4GroupInterface* state)
885 {
886 	MutexLocker _(sMulticastGroupsLock);
887 
888 	sockaddr_in groupAddr;
889 	status_t status = sDatalinkModule->join_multicast(state->Interface(),
890 		sDomain, fill_sockaddr_in(&groupAddr, state->Address().s_addr));
891 	if (status != B_OK)
892 		return status;
893 
894 	sMulticastState->Insert(state);
895 	return B_OK;
896 }
897 
898 
899 status_t
900 IPv4Multicast::LeaveGroup(IPv4GroupInterface* state)
901 {
902 	MutexLocker _(sMulticastGroupsLock);
903 
904 	sMulticastState->Remove(state);
905 
906 	sockaddr_in groupAddr;
907 	return sDatalinkModule->leave_multicast(state->Interface(), sDomain,
908 		fill_sockaddr_in(&groupAddr, state->Address().s_addr));
909 }
910 
911 
912 static status_t
913 ipv4_delta_group(IPv4GroupInterface* group, int option,
914 	net_interface* interface, const in_addr* sourceAddr)
915 {
916 	switch (option) {
917 		case IP_ADD_MEMBERSHIP:
918 			return group->Add();
919 		case IP_DROP_MEMBERSHIP:
920 			return group->Drop();
921 		case IP_BLOCK_SOURCE:
922 			return group->BlockSource(*sourceAddr);
923 		case IP_UNBLOCK_SOURCE:
924 			return group->UnblockSource(*sourceAddr);
925 		case IP_ADD_SOURCE_MEMBERSHIP:
926 			return group->AddSSM(*sourceAddr);
927 		case IP_DROP_SOURCE_MEMBERSHIP:
928 			return group->DropSSM(*sourceAddr);
929 	}
930 
931 	return B_ERROR;
932 }
933 
934 
935 static status_t
936 ipv4_delta_membership(ipv4_protocol* protocol, int option,
937 	net_interface* interface, const in_addr* groupAddr,
938 	const in_addr* sourceAddr)
939 {
940 	IPv4MulticastFilter& filter = protocol->multicast_filter;
941 	IPv4GroupInterface* state = NULL;
942 	status_t status = B_OK;
943 
944 	switch (option) {
945 		case IP_ADD_MEMBERSHIP:
946 		case IP_ADD_SOURCE_MEMBERSHIP:
947 			status = filter.GetState(*groupAddr, interface, state, true);
948 			break;
949 
950 		case IP_DROP_MEMBERSHIP:
951 		case IP_BLOCK_SOURCE:
952 		case IP_UNBLOCK_SOURCE:
953 		case IP_DROP_SOURCE_MEMBERSHIP:
954 			filter.GetState(*groupAddr, interface, state, false);
955 			if (state == NULL) {
956 				if (option == IP_DROP_MEMBERSHIP
957 					|| option == IP_DROP_SOURCE_MEMBERSHIP)
958 					return EADDRNOTAVAIL;
959 
960 				return B_BAD_VALUE;
961 			}
962 			break;
963 	}
964 
965 	if (status != B_OK)
966 		return status;
967 
968 	status = ipv4_delta_group(state, option, interface, sourceAddr);
969 	filter.ReturnState(state);
970 	return status;
971 }
972 
973 
974 static int
975 generic_to_ipv4(int option)
976 {
977 	switch (option) {
978 		case MCAST_JOIN_GROUP:
979 			return IP_ADD_MEMBERSHIP;
980 		case MCAST_JOIN_SOURCE_GROUP:
981 			return IP_ADD_SOURCE_MEMBERSHIP;
982 		case MCAST_LEAVE_GROUP:
983 			return IP_DROP_MEMBERSHIP;
984 		case MCAST_BLOCK_SOURCE:
985 			return IP_BLOCK_SOURCE;
986 		case MCAST_UNBLOCK_SOURCE:
987 			return IP_UNBLOCK_SOURCE;
988 		case MCAST_LEAVE_SOURCE_GROUP:
989 			return IP_DROP_SOURCE_MEMBERSHIP;
990 	}
991 
992 	return -1;
993 }
994 
995 
996 static net_interface*
997 get_multicast_interface(ipv4_protocol* protocol, const in_addr* address)
998 {
999 	// TODO: this is broken and leaks references
1000 	sockaddr_in groupAddr;
1001 	net_route* route = sDatalinkModule->get_route(sDomain,
1002 		fill_sockaddr_in(&groupAddr, address ? address->s_addr : INADDR_ANY));
1003 	if (route == NULL)
1004 		return NULL;
1005 
1006 	return route->interface_address->interface;
1007 }
1008 
1009 
1010 static status_t
1011 ipv4_delta_membership(ipv4_protocol* protocol, int option,
1012 	in_addr* interfaceAddr, in_addr* groupAddr, in_addr* sourceAddr)
1013 {
1014 	net_interface* interface = NULL;
1015 
1016 	if (interfaceAddr->s_addr == INADDR_ANY) {
1017 		interface = get_multicast_interface(protocol, groupAddr);
1018 	} else {
1019 		sockaddr_in address;
1020 		interface = sDatalinkModule->get_interface_with_address(
1021 			fill_sockaddr_in(&address, interfaceAddr->s_addr));
1022 	}
1023 
1024 	if (interface == NULL)
1025 		return B_DEVICE_NOT_FOUND;
1026 
1027 	return ipv4_delta_membership(protocol, option, interface,
1028 		groupAddr, sourceAddr);
1029 }
1030 
1031 
1032 static status_t
1033 ipv4_generic_delta_membership(ipv4_protocol* protocol, int option,
1034 	uint32 index, const sockaddr_storage* _groupAddr,
1035 	const sockaddr_storage* _sourceAddr)
1036 {
1037 	if (_groupAddr->ss_family != AF_INET
1038 		|| (_sourceAddr != NULL && _sourceAddr->ss_family != AF_INET))
1039 		return B_BAD_VALUE;
1040 
1041 	const in_addr* groupAddr = &((const sockaddr_in*)_groupAddr)->sin_addr;
1042 
1043 	// TODO: this is broken and leaks references
1044 	net_interface* interface;
1045 	if (index == 0)
1046 		interface = get_multicast_interface(protocol, groupAddr);
1047 	else
1048 		interface = sDatalinkModule->get_interface(sDomain, index);
1049 
1050 	if (interface == NULL)
1051 		return B_DEVICE_NOT_FOUND;
1052 
1053 	const in_addr* sourceAddr = NULL;
1054 	if (_sourceAddr != NULL)
1055 		sourceAddr = &((const sockaddr_in*)_sourceAddr)->sin_addr;
1056 
1057 	return ipv4_delta_membership(protocol, generic_to_ipv4(option), interface,
1058 		groupAddr, sourceAddr);
1059 }
1060 
1061 
1062 //	#pragma mark - module interface
1063 
1064 
1065 net_protocol*
1066 ipv4_init_protocol(net_socket* socket)
1067 {
1068 	ipv4_protocol* protocol = new (std::nothrow) ipv4_protocol();
1069 	if (protocol == NULL)
1070 		return NULL;
1071 
1072 	protocol->raw = NULL;
1073 	protocol->service_type = 0;
1074 	protocol->time_to_live = kDefaultTTL;
1075 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
1076 	protocol->multicast_loopback = kDefaultMulticastLoopback;
1077 	protocol->flags = 0;
1078 	protocol->multicast_address = NULL;
1079 	return protocol;
1080 }
1081 
1082 
1083 status_t
1084 ipv4_uninit_protocol(net_protocol* _protocol)
1085 {
1086 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1087 
1088 	delete protocol;
1089 
1090 	return B_OK;
1091 }
1092 
1093 
1094 /*!	Since open() is only called on the top level protocol, when we get here
1095 	it means we are on a SOCK_RAW socket.
1096 */
1097 status_t
1098 ipv4_open(net_protocol* _protocol)
1099 {
1100 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1101 
1102 	// Only root may open raw sockets
1103 	if (geteuid() != 0)
1104 		return B_NOT_ALLOWED;
1105 
1106 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1107 	if (raw == NULL)
1108 		return B_NO_MEMORY;
1109 
1110 	status_t status = raw->InitCheck();
1111 	if (status != B_OK) {
1112 		delete raw;
1113 		return status;
1114 	}
1115 
1116 	TRACE_SK(protocol, "Open()");
1117 
1118 	protocol->raw = raw;
1119 
1120 	MutexLocker locker(sRawSocketsLock);
1121 	sRawSockets.Add(raw);
1122 	return B_OK;
1123 }
1124 
1125 
1126 status_t
1127 ipv4_close(net_protocol* _protocol)
1128 {
1129 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1130 	RawSocket* raw = protocol->raw;
1131 	if (raw == NULL)
1132 		return B_ERROR;
1133 
1134 	TRACE_SK(protocol, "Close()");
1135 
1136 	MutexLocker locker(sRawSocketsLock);
1137 	sRawSockets.Remove(raw);
1138 	delete raw;
1139 	protocol->raw = NULL;
1140 
1141 	return B_OK;
1142 }
1143 
1144 
1145 status_t
1146 ipv4_free(net_protocol* protocol)
1147 {
1148 	return B_OK;
1149 }
1150 
1151 
1152 status_t
1153 ipv4_connect(net_protocol* _protocol, const struct sockaddr* address)
1154 {
1155 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1156 	RawSocket* raw = protocol->raw;
1157 	if (raw == NULL)
1158 		return B_ERROR;
1159 	if (address->sa_len != sizeof(struct sockaddr_in))
1160 		return B_BAD_VALUE;
1161 	if (address->sa_family != AF_INET)
1162 		return EAFNOSUPPORT;
1163 
1164 	memcpy(&protocol->socket->peer, address, sizeof(struct sockaddr_in));
1165 	sSocketModule->set_connected(protocol->socket);
1166 
1167 	return B_OK;
1168 }
1169 
1170 
1171 status_t
1172 ipv4_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1173 {
1174 	return B_NOT_SUPPORTED;
1175 }
1176 
1177 
1178 status_t
1179 ipv4_control(net_protocol* _protocol, int level, int option, void* value,
1180 	size_t* _length)
1181 {
1182 	if ((level & LEVEL_MASK) != IPPROTO_IP)
1183 		return sDatalinkModule->control(sDomain, option, value, _length);
1184 
1185 	return B_BAD_VALUE;
1186 }
1187 
1188 
1189 status_t
1190 ipv4_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1191 	int* _length)
1192 {
1193 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1194 
1195 	if (level == IPPROTO_IP) {
1196 		bool isDgramOrRaw = protocol->socket->type == SOCK_DGRAM
1197 			|| protocol->socket->type == SOCK_RAW;
1198 		if (option == IP_HDRINCL) {
1199 			return get_int_option(value, *_length,
1200 				(protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0);
1201 		}
1202 		if (option == IP_RECVDSTADDR) {
1203 			return get_int_option(value, *_length,
1204 				(protocol->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0);
1205 		}
1206 		if (option == IP_TTL)
1207 			return get_int_option(value, *_length, protocol->time_to_live);
1208 		if (option == IP_TOS)
1209 			return get_int_option(value, *_length, protocol->service_type);
1210 		if (option == IP_MULTICAST_IF) {
1211 			if (*_length != sizeof(struct in_addr))
1212 				return B_BAD_VALUE;
1213 			if (!isDgramOrRaw)
1214 				return B_NOT_SUPPORTED;
1215 			struct sockaddr_in defaultAddress;
1216 			defaultAddress.sin_addr.s_addr = htonl(INADDR_ANY);
1217 			struct sockaddr_in* address =
1218 				(struct sockaddr_in*)protocol->multicast_address;
1219 			if (address == NULL)
1220 				address = &defaultAddress;
1221 			if (user_memcpy(value, &address->sin_addr, sizeof(struct in_addr))
1222 					!= B_OK) {
1223 				return B_BAD_ADDRESS;
1224 			}
1225 			return B_OK;
1226 		}
1227 		if (option == IP_MULTICAST_TTL) {
1228 			if (!isDgramOrRaw)
1229 				return B_NOT_SUPPORTED;
1230 			return get_char_int_option(value, *_length,
1231 				protocol->multicast_time_to_live);
1232 		}
1233 		if (option == IP_MULTICAST_LOOP) {
1234 			if (!isDgramOrRaw)
1235 				return B_NOT_SUPPORTED;
1236 			return get_char_int_option(value, *_length,
1237 				protocol->multicast_loopback ? 1 : 0);
1238 		}
1239 		if (option == IP_ADD_MEMBERSHIP
1240 			|| option == IP_DROP_MEMBERSHIP
1241 			|| option == IP_BLOCK_SOURCE
1242 			|| option == IP_UNBLOCK_SOURCE
1243 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1244 			|| option == IP_DROP_SOURCE_MEMBERSHIP
1245 			|| option == MCAST_JOIN_GROUP
1246 			|| option == MCAST_LEAVE_GROUP
1247 			|| option == MCAST_BLOCK_SOURCE
1248 			|| option == MCAST_UNBLOCK_SOURCE
1249 			|| option == MCAST_JOIN_SOURCE_GROUP
1250 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1251 			// RFC 3678, Section 4.1:
1252 			// ``An error of EOPNOTSUPP is returned if these options are
1253 			// used with getsockopt().''
1254 			return B_NOT_SUPPORTED;
1255 		}
1256 
1257 		dprintf("IPv4::getsockopt(): get unknown option: %d\n", option);
1258 		return ENOPROTOOPT;
1259 	}
1260 
1261 	return sSocketModule->get_option(protocol->socket, level, option, value,
1262 		_length);
1263 }
1264 
1265 
1266 status_t
1267 ipv4_setsockopt(net_protocol* _protocol, int level, int option,
1268 	const void* value, int length)
1269 {
1270 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1271 
1272 	if (level == IPPROTO_IP) {
1273 		bool isDgramOrRaw = protocol->socket->type == SOCK_DGRAM
1274 			|| protocol->socket->type == SOCK_RAW;
1275 		if (option == IP_HDRINCL) {
1276 			int headerIncluded;
1277 			if (length != sizeof(int))
1278 				return B_BAD_VALUE;
1279 			if (user_memcpy(&headerIncluded, value, sizeof(headerIncluded))
1280 					!= B_OK)
1281 				return B_BAD_ADDRESS;
1282 
1283 			if (headerIncluded)
1284 				protocol->flags |= IP_FLAG_HEADER_INCLUDED;
1285 			else
1286 				protocol->flags &= ~IP_FLAG_HEADER_INCLUDED;
1287 
1288 			return B_OK;
1289 		}
1290 		if (option == IP_RECVDSTADDR) {
1291 			int getAddress;
1292 			if (length != sizeof(int))
1293 				return B_BAD_VALUE;
1294 			if (user_memcpy(&getAddress, value, sizeof(int)) != B_OK)
1295 				return B_BAD_ADDRESS;
1296 
1297 			if (getAddress && (protocol->socket->type == SOCK_DGRAM
1298 					|| protocol->socket->type == SOCK_RAW))
1299 				protocol->flags |= IP_FLAG_RECEIVE_DEST_ADDR;
1300 			else
1301 				protocol->flags &= ~IP_FLAG_RECEIVE_DEST_ADDR;
1302 
1303 			return B_OK;
1304 		}
1305 		if (option == IP_TTL)
1306 			return set_int_option(protocol->time_to_live, value, length);
1307 		if (option == IP_TOS)
1308 			return set_int_option(protocol->service_type, value, length);
1309 		if (option == IP_MULTICAST_IF) {
1310 			if (length != sizeof(struct in_addr))
1311 				return B_BAD_VALUE;
1312 			if (!isDgramOrRaw)
1313 				return B_NOT_SUPPORTED;
1314 
1315 			struct sockaddr_in* address = new (std::nothrow) sockaddr_in;
1316 			if (address == NULL)
1317 				return B_NO_MEMORY;
1318 
1319 			struct in_addr sin_addr;
1320 			if (user_memcpy(&sin_addr, value, sizeof(struct in_addr))
1321 					!= B_OK) {
1322 				delete address;
1323 				return B_BAD_ADDRESS;
1324 			}
1325 			fill_sockaddr_in(address, sin_addr.s_addr);
1326 
1327 			// Using INADDR_ANY to remove the previous setting.
1328 			if (address->sin_addr.s_addr == htonl(INADDR_ANY)) {
1329 				delete address;
1330 				delete protocol->multicast_address;
1331 				protocol->multicast_address = NULL;
1332 				return B_OK;
1333 			}
1334 
1335 			struct net_interface* interface
1336 				= sDatalinkModule->get_interface_with_address(
1337 					(sockaddr*)address);
1338 			if (interface == NULL) {
1339 				delete address;
1340 				return EADDRNOTAVAIL;
1341 			}
1342 
1343 			delete protocol->multicast_address;
1344 			protocol->multicast_address = (struct sockaddr*)address;
1345 
1346 			sDatalinkModule->put_interface(interface);
1347 			return B_OK;
1348 		}
1349 		if (option == IP_MULTICAST_TTL) {
1350 			if (!isDgramOrRaw)
1351 				return B_NOT_SUPPORTED;
1352 			return set_char_int_option(protocol->multicast_time_to_live, value,
1353 				length);
1354 		}
1355 		if (option == IP_MULTICAST_LOOP) {
1356 			if (!isDgramOrRaw)
1357 				return B_NOT_SUPPORTED;
1358 			uint8 multicast_loopback;
1359 			status_t status = set_char_int_option(multicast_loopback, value,
1360 				length);
1361 			if (status == B_OK)
1362 				protocol->multicast_loopback = multicast_loopback != 0;
1363 			return status;
1364 		}
1365 		if (option == IP_ADD_MEMBERSHIP || option == IP_DROP_MEMBERSHIP) {
1366 			if (!isDgramOrRaw)
1367 				return B_NOT_SUPPORTED;
1368 			ip_mreq mreq;
1369 			if (length != sizeof(ip_mreq))
1370 				return B_BAD_VALUE;
1371 			if (user_memcpy(&mreq, value, sizeof(ip_mreq)) != B_OK)
1372 				return B_BAD_ADDRESS;
1373 
1374 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1375 				&mreq.imr_multiaddr, NULL);
1376 		}
1377 		if (option == IP_BLOCK_SOURCE
1378 			|| option == IP_UNBLOCK_SOURCE
1379 			|| option == IP_ADD_SOURCE_MEMBERSHIP
1380 			|| option == IP_DROP_SOURCE_MEMBERSHIP) {
1381 			if (!isDgramOrRaw)
1382 				return B_NOT_SUPPORTED;
1383 			ip_mreq_source mreq;
1384 			if (length != sizeof(ip_mreq_source))
1385 				return B_BAD_VALUE;
1386 			if (user_memcpy(&mreq, value, sizeof(ip_mreq_source)) != B_OK)
1387 				return B_BAD_ADDRESS;
1388 
1389 			return ipv4_delta_membership(protocol, option, &mreq.imr_interface,
1390 				&mreq.imr_multiaddr, &mreq.imr_sourceaddr);
1391 		}
1392 		if (option == MCAST_LEAVE_GROUP || option == MCAST_JOIN_GROUP) {
1393 			if (!isDgramOrRaw)
1394 				return B_NOT_SUPPORTED;
1395 			group_req greq;
1396 			if (length != sizeof(group_req))
1397 				return B_BAD_VALUE;
1398 			if (user_memcpy(&greq, value, sizeof(group_req)) != B_OK)
1399 				return B_BAD_ADDRESS;
1400 
1401 			return ipv4_generic_delta_membership(protocol, option,
1402 				greq.gr_interface, &greq.gr_group, NULL);
1403 		}
1404 		if (option == MCAST_BLOCK_SOURCE
1405 			|| option == MCAST_UNBLOCK_SOURCE
1406 			|| option == MCAST_JOIN_SOURCE_GROUP
1407 			|| option == MCAST_LEAVE_SOURCE_GROUP) {
1408 			if (!isDgramOrRaw)
1409 				return B_NOT_SUPPORTED;
1410 			group_source_req greq;
1411 			if (length != sizeof(group_source_req))
1412 				return B_BAD_VALUE;
1413 			if (user_memcpy(&greq, value, sizeof(group_source_req)) != B_OK)
1414 				return B_BAD_ADDRESS;
1415 
1416 			return ipv4_generic_delta_membership(protocol, option,
1417 				greq.gsr_interface, &greq.gsr_group, &greq.gsr_source);
1418 		}
1419 
1420 		dprintf("IPv4::setsockopt(): set unknown option: %d\n", option);
1421 		return ENOPROTOOPT;
1422 	}
1423 
1424 	return sSocketModule->set_option(protocol->socket, level, option,
1425 		value, length);
1426 }
1427 
1428 
1429 status_t
1430 ipv4_bind(net_protocol* protocol, const struct sockaddr* address)
1431 {
1432 	if (address->sa_family != AF_INET)
1433 		return EAFNOSUPPORT;
1434 
1435 	// only INADDR_ANY and addresses of local interfaces are accepted:
1436 	if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY
1437 		|| IN_MULTICAST(ntohl(((sockaddr_in*)address)->sin_addr.s_addr))
1438 		|| sDatalinkModule->is_local_address(sDomain, address, NULL, NULL)) {
1439 		memcpy(&protocol->socket->address, address, sizeof(struct sockaddr_in));
1440 		protocol->socket->address.ss_len = sizeof(struct sockaddr_in);
1441 			// explicitly set length, as our callers can't be trusted to
1442 			// always provide the correct length!
1443 		return B_OK;
1444 	}
1445 
1446 	return EADDRNOTAVAIL;
1447 		// address is unknown on this host
1448 }
1449 
1450 
1451 status_t
1452 ipv4_unbind(net_protocol* protocol, struct sockaddr* address)
1453 {
1454 	// nothing to do here
1455 	return B_OK;
1456 }
1457 
1458 
1459 status_t
1460 ipv4_listen(net_protocol* protocol, int count)
1461 {
1462 	return B_NOT_SUPPORTED;
1463 }
1464 
1465 
1466 status_t
1467 ipv4_shutdown(net_protocol* protocol, int direction)
1468 {
1469 	return B_NOT_SUPPORTED;
1470 }
1471 
1472 
1473 status_t
1474 ipv4_send_routed_data(net_protocol* _protocol, struct net_route* route,
1475 	net_buffer* buffer)
1476 {
1477 	if (route == NULL)
1478 		return B_BAD_VALUE;
1479 
1480 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1481 	net_interface_address* interfaceAddress = route->interface_address;
1482 	net_interface* interface = interfaceAddress->interface;
1483 
1484 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%" B_PRIu32 " bytes])", route,
1485 		buffer, buffer->size);
1486 
1487 	sockaddr_in& source = *(sockaddr_in*)buffer->source;
1488 	sockaddr_in& destination = *(sockaddr_in*)buffer->destination;
1489 	sockaddr_in* broadcastAddress = (sockaddr_in*)interfaceAddress->destination;
1490 
1491 	bool checksumNeeded = true;
1492 	bool headerIncluded = false;
1493 	if (protocol != NULL)
1494 		headerIncluded = (protocol->flags & IP_FLAG_HEADER_INCLUDED) != 0;
1495 
1496 	buffer->msg_flags &= ~(MSG_BCAST | MSG_MCAST);
1497 
1498 	if (destination.sin_addr.s_addr == INADDR_ANY)
1499 		return EDESTADDRREQ;
1500 
1501 	if ((interface->device->flags & IFF_BROADCAST) != 0
1502 		&& (destination.sin_addr.s_addr == INADDR_BROADCAST
1503 			|| (broadcastAddress != NULL && destination.sin_addr.s_addr
1504 					== broadcastAddress->sin_addr.s_addr))) {
1505 		if (protocol && !(protocol->socket->options & SO_BROADCAST))
1506 			return B_BAD_VALUE;
1507 		buffer->msg_flags |= MSG_BCAST;
1508 	} else if (IN_MULTICAST(ntohl(destination.sin_addr.s_addr)))
1509 		buffer->msg_flags |= MSG_MCAST;
1510 
1511 	// Add IP header (if needed)
1512 
1513 	if (!headerIncluded) {
1514 		NetBufferPrepend<ipv4_header> header(buffer);
1515 		if (header.Status() != B_OK)
1516 			return header.Status();
1517 
1518 		header->version = IPV4_VERSION;
1519 		header->header_length = sizeof(ipv4_header) / 4;
1520 		header->service_type = protocol ? protocol->service_type : 0;
1521 		header->total_length = htons(buffer->size);
1522 		header->id = htons(atomic_add(&sPacketID, 1));
1523 		header->fragment_offset = 0;
1524 		if (protocol) {
1525 			header->time_to_live = (buffer->msg_flags & MSG_MCAST) != 0
1526 				? protocol->multicast_time_to_live : protocol->time_to_live;
1527 		} else {
1528 			header->time_to_live = (buffer->msg_flags & MSG_MCAST) != 0
1529 				? kDefaultMulticastTTL : kDefaultTTL;
1530 		}
1531 		header->protocol = protocol
1532 			? protocol->socket->protocol : buffer->protocol;
1533 		header->checksum = 0;
1534 
1535 		header->source = source.sin_addr.s_addr;
1536 		header->destination = destination.sin_addr.s_addr;
1537 
1538 		TRACE_ONLY(dump_ipv4_header(*header));
1539 	} else {
1540 		// if IP_HDRINCL, check if the source address is set
1541 		NetBufferHeaderReader<ipv4_header> header(buffer);
1542 		if (header.Status() != B_OK)
1543 			return header.Status();
1544 
1545 		if (header->source == 0) {
1546 			header->source = source.sin_addr.s_addr;
1547 			header->checksum = 0;
1548 			header.Sync();
1549 		} else
1550 			checksumNeeded = false;
1551 
1552 		TRACE("  Header was already supplied:");
1553 		TRACE_ONLY(dump_ipv4_header(*header));
1554 	}
1555 
1556 	if (buffer->size > 0xffff)
1557 		return EMSGSIZE;
1558 
1559 	if (checksumNeeded) {
1560 		*IPChecksumField(buffer) = gBufferModule->checksum(buffer, 0,
1561 			sizeof(ipv4_header), true);
1562 		buffer->buffer_flags |= NET_BUFFER_L3_CHECKSUM_VALID;
1563 	}
1564 
1565 	if ((buffer->msg_flags & MSG_MCAST) != 0
1566 		&& (protocol != NULL && protocol->multicast_loopback)) {
1567 		// copy an IP multicast packet to the input queue of the loopback
1568 		// interface
1569 		net_buffer *loopbackBuffer = gBufferModule->duplicate(buffer);
1570 		if (loopbackBuffer == NULL)
1571 			return B_NO_MEMORY;
1572 		status_t status = B_ERROR;
1573 
1574 		// get the IPv4 loopback address
1575 		struct sockaddr loopbackAddress;
1576 		gIPv4AddressModule.get_loopback_address(&loopbackAddress);
1577 
1578 		// get the matching interface address if any
1579 		net_interface_address* address =
1580 			sDatalinkModule->get_interface_address(&loopbackAddress);
1581 		if (address == NULL || (address->interface->flags & IFF_UP) == 0) {
1582 			sDatalinkModule->put_interface_address(address);
1583 		} else {
1584 			sDatalinkModule->put_interface_address(
1585 				loopbackBuffer->interface_address);
1586 			loopbackBuffer->interface_address = address;
1587 			status = ipv4_receive_data(loopbackBuffer);
1588 		}
1589 
1590 		if (status != B_OK)
1591 			gBufferModule->free(loopbackBuffer);
1592 	}
1593 
1594 	TRACE_SK(protocol, "  SendRoutedData(): header chksum: %" B_PRIu32
1595 		", buffer checksum: %" B_PRIu32,
1596 		gBufferModule->checksum(buffer, 0, sizeof(ipv4_header), true),
1597 		gBufferModule->checksum(buffer, 0, buffer->size, true));
1598 
1599 	TRACE_SK(protocol, "  SendRoutedData(): destination: %08x",
1600 		ntohl(destination.sin_addr.s_addr));
1601 
1602 	uint32 mtu = route->mtu ? route->mtu : interface->device->mtu;
1603 	if (buffer->size > mtu) {
1604 		// we need to fragment the packet
1605 		return send_fragments(protocol, route, buffer, mtu);
1606 	}
1607 
1608 	return sDatalinkModule->send_routed_data(route, buffer);
1609 }
1610 
1611 
1612 status_t
1613 ipv4_send_data(net_protocol* _protocol, net_buffer* buffer)
1614 {
1615 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1616 
1617 	TRACE_SK(protocol, "SendData(%p [%" B_PRIu32 " bytes])", buffer,
1618 		buffer->size);
1619 
1620 	if (protocol != NULL && (protocol->flags & IP_FLAG_HEADER_INCLUDED)) {
1621 		if (buffer->size < sizeof(ipv4_header))
1622 			return B_BAD_VALUE;
1623 
1624 		sockaddr_in* source = (sockaddr_in*)buffer->source;
1625 		sockaddr_in* destination = (sockaddr_in*)buffer->destination;
1626 
1627 		fill_sockaddr_in(source, *NetBufferField<in_addr_t,
1628 			offsetof(ipv4_header, source)>(buffer));
1629 		fill_sockaddr_in(destination, *NetBufferField<in_addr_t,
1630 			offsetof(ipv4_header, destination)>(buffer));
1631 	}
1632 
1633 	// handle IP_MULTICAST_IF
1634 	if (IN_MULTICAST(ntohl(
1635 			((sockaddr_in*)buffer->destination)->sin_addr.s_addr))
1636 		&& protocol != NULL && protocol->multicast_address != NULL) {
1637 		net_interface_address* address = sDatalinkModule->get_interface_address(
1638 			protocol->multicast_address);
1639 		if (address == NULL || (address->interface->flags & IFF_UP) == 0) {
1640 			sDatalinkModule->put_interface_address(address);
1641 			return EADDRNOTAVAIL;
1642 		}
1643 
1644 		sDatalinkModule->put_interface_address(buffer->interface_address);
1645 		buffer->interface_address = address;
1646 			// the buffer takes over ownership of the address
1647 
1648 		net_route* route = sDatalinkModule->get_route(sDomain, address->local);
1649 		if (route == NULL)
1650 			return ENETUNREACH;
1651 
1652 		return sDatalinkModule->send_routed_data(route, buffer);
1653 	}
1654 
1655 	return sDatalinkModule->send_data(protocol, sDomain, buffer);
1656 }
1657 
1658 
1659 ssize_t
1660 ipv4_send_avail(net_protocol* protocol)
1661 {
1662 	return B_ERROR;
1663 }
1664 
1665 
1666 status_t
1667 ipv4_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1668 	net_buffer** _buffer)
1669 {
1670 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1671 	RawSocket* raw = protocol->raw;
1672 	if (raw == NULL)
1673 		return B_ERROR;
1674 
1675 	TRACE_SK(protocol, "ReadData(%lu, 0x%" B_PRIx32 ")", numBytes, flags);
1676 
1677 	return raw->Dequeue(flags, _buffer);
1678 }
1679 
1680 
1681 ssize_t
1682 ipv4_read_avail(net_protocol* _protocol)
1683 {
1684 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1685 	RawSocket* raw = protocol->raw;
1686 	if (raw == NULL)
1687 		return B_ERROR;
1688 
1689 	return raw->AvailableData();
1690 }
1691 
1692 
1693 struct net_domain*
1694 ipv4_get_domain(net_protocol* protocol)
1695 {
1696 	return sDomain;
1697 }
1698 
1699 
1700 size_t
1701 ipv4_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1702 {
1703 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1704 	if (route == NULL)
1705 		return 0;
1706 
1707 	size_t mtu;
1708 	if (route->mtu != 0)
1709 		mtu = route->mtu;
1710 	else
1711 		mtu = route->interface_address->interface->device->mtu;
1712 
1713 	sDatalinkModule->put_route(sDomain, route);
1714 
1715 	if (mtu > 0xffff)
1716 		mtu = 0xffff;
1717 
1718 	return mtu - sizeof(ipv4_header);
1719 }
1720 
1721 
1722 status_t
1723 ipv4_receive_data(net_buffer* buffer)
1724 {
1725 	TRACE("ipv4_receive_data(%p [%" B_PRIu32 " bytes])", buffer, buffer->size);
1726 
1727 	uint16 headerLength = 0;
1728 	{
1729 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1730 	if (bufferHeader.Status() != B_OK)
1731 		return bufferHeader.Status();
1732 
1733 	ipv4_header& header = bufferHeader.Data();
1734 	TRACE_ONLY(dump_ipv4_header(header));
1735 
1736 	if (header.version != IPV4_VERSION)
1737 		return B_BAD_TYPE;
1738 
1739 	uint16 packetLength = header.TotalLength();
1740 	headerLength = header.HeaderLength();
1741 	if (packetLength > buffer->size
1742 		|| headerLength < sizeof(ipv4_header))
1743 		return B_BAD_DATA;
1744 
1745 	if ((buffer->buffer_flags & NET_BUFFER_L3_CHECKSUM_VALID) == 0) {
1746 		if (gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1747 			return B_BAD_DATA;
1748 	}
1749 
1750 	// lower layers notion of broadcast or multicast have no relevance to us
1751 	// other than deciding whether to send an ICMP error
1752 	bool wasMulticast = (buffer->msg_flags & (MSG_BCAST | MSG_MCAST)) != 0;
1753 	bool notForUs = false;
1754 	buffer->msg_flags &= ~(MSG_BCAST | MSG_MCAST);
1755 
1756 	sockaddr_in destination;
1757 	fill_sockaddr_in(&destination, header.destination);
1758 
1759 	if (header.destination == INADDR_BROADCAST) {
1760 		buffer->msg_flags |= MSG_BCAST;
1761 
1762 		// Find first interface with a matching family
1763 		if (!sDatalinkModule->is_local_link_address(sDomain, true,
1764 				buffer->destination, &buffer->interface_address))
1765 			notForUs = !wasMulticast;
1766 	} else if (IN_MULTICAST(ntohl(header.destination))) {
1767 		buffer->msg_flags |= MSG_MCAST;
1768 	} else {
1769 		uint32 matchedAddressType = 0;
1770 
1771 		// test if the packet is really for us
1772 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1773 				&buffer->interface_address, &matchedAddressType)
1774 			&& !sDatalinkModule->is_local_link_address(sDomain, true,
1775 				buffer->destination, &buffer->interface_address)) {
1776 			// if the buffer was a link layer multicast, regard it as a
1777 			// broadcast, and let the upper levels decide what to do with it
1778 			if (wasMulticast)
1779 				buffer->msg_flags |= MSG_BCAST;
1780 			else
1781 				notForUs = true;
1782 		} else {
1783 			// copy over special address types (MSG_BCAST or MSG_MCAST):
1784 			buffer->msg_flags |= matchedAddressType;
1785 		}
1786 	}
1787 
1788 	// set net_buffer's source/destination address
1789 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1790 	memcpy(buffer->destination, &destination, sizeof(sockaddr_in));
1791 
1792 	buffer->protocol = header.protocol;
1793 
1794 	if (notForUs) {
1795 		TRACE("  ipv4_receive_data(): packet was not for us %x -> %x",
1796 			ntohl(header.source), ntohl(header.destination));
1797 
1798 		if (!wasMulticast) {
1799 			// Send ICMP error: Host unreachable
1800 			sDomain->module->error_reply(NULL, buffer, B_NET_ERROR_UNREACH_HOST,
1801 				NULL);
1802 		}
1803 
1804 		return B_ERROR;
1805 	}
1806 
1807 	// remove any trailing/padding data
1808 	status_t status = gBufferModule->trim(buffer, packetLength);
1809 	if (status != B_OK)
1810 		return status;
1811 
1812 	// check for fragmentation
1813 	uint16 fragmentOffset = header.FragmentOffset();
1814 	if ((fragmentOffset & IP_MORE_FRAGMENTS) != 0
1815 		|| (fragmentOffset & IP_FRAGMENT_OFFSET_MASK) != 0) {
1816 		// this is a fragment
1817 		TRACE("  ipv4_receive_data(): Found a Fragment!");
1818 		status = reassemble_fragments(header, &buffer);
1819 		TRACE("  ipv4_receive_data():  -> %s", strerror(status));
1820 		if (status != B_OK)
1821 			return status;
1822 
1823 		if (buffer == NULL) {
1824 			// buffer was put into fragment packet
1825 			TRACE("  ipv4_receive_data(): Not yet assembled.");
1826 			return B_OK;
1827 		}
1828 	}
1829 
1830 	// Since the buffer might have been changed (reassembled fragment)
1831 	// we must no longer access bufferHeader or header anymore after
1832 	// this point
1833 	}
1834 
1835 	bool rawDelivered = raw_receive_data(buffer);
1836 
1837 	// Preserve the ipv4 header for ICMP processing
1838 	gBufferModule->store_header(buffer);
1839 	gBufferModule->remove_header(buffer, headerLength);
1840 		// the header is of variable size and may include IP options
1841 		// (TODO: that we ignore for now)
1842 
1843 	net_protocol_module_info* module = receiving_protocol(buffer->protocol);
1844 	if (module == NULL) {
1845 		// no handler for this packet
1846 		if (!rawDelivered) {
1847 			sDomain->module->error_reply(NULL, buffer,
1848 				B_NET_ERROR_UNREACH_PROTOCOL, NULL);
1849 		}
1850 		return EAFNOSUPPORT;
1851 	}
1852 
1853 	if ((buffer->msg_flags & MSG_MCAST) != 0) {
1854 		// Unfortunately historical reasons dictate that the IP multicast
1855 		// model be a little different from the unicast one. We deliver
1856 		// this frame directly to all sockets registered with interest
1857 		// for this multicast group.
1858 		deliver_multicast(module, buffer, false);
1859 		gBufferModule->free(buffer);
1860 		return B_OK;
1861 	}
1862 
1863 	return module->receive_data(buffer);
1864 }
1865 
1866 
1867 status_t
1868 ipv4_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1869 {
1870 	ipv4_protocol* protocol = (ipv4_protocol*)_protocol;
1871 
1872 	if (protocol->raw == NULL)
1873 		return B_ERROR;
1874 
1875 	return protocol->raw->EnqueueClone(buffer);
1876 }
1877 
1878 
1879 status_t
1880 ipv4_error_received(net_error error, net_buffer* buffer)
1881 {
1882 	TRACE("  ipv4_error_received(error %d, buffer %p [%" B_PRIu32 " bytes])",
1883 		(int)error, buffer, buffer->size);
1884 
1885 	NetBufferHeaderReader<ipv4_header> bufferHeader(buffer);
1886 	if (bufferHeader.Status() != B_OK)
1887 		return bufferHeader.Status();
1888 
1889 	ipv4_header& header = bufferHeader.Data();
1890 	TRACE_ONLY(dump_ipv4_header(header));
1891 
1892 	// We do not check the packet length, as we usually only get a part of it
1893 	uint16 headerLength = header.HeaderLength();
1894 	if (header.version != IPV4_VERSION
1895 		|| headerLength < sizeof(ipv4_header)
1896 		|| gBufferModule->checksum(buffer, 0, headerLength, true) != 0)
1897 		return B_BAD_DATA;
1898 
1899 	// Restore addresses of the original buffer
1900 
1901 	// lower layers notion of broadcast or multicast have no relevance to us
1902 	// TODO: they actually have when deciding whether to send an ICMP error
1903 	buffer->msg_flags &= ~(MSG_BCAST | MSG_MCAST);
1904 
1905 	fill_sockaddr_in((struct sockaddr_in*)buffer->source, header.source);
1906 	fill_sockaddr_in((struct sockaddr_in*)buffer->destination,
1907 		header.destination);
1908 
1909 	if (header.destination == INADDR_BROADCAST)
1910 		buffer->msg_flags |= MSG_BCAST;
1911 	else if (IN_MULTICAST(ntohl(header.destination)))
1912 		buffer->msg_flags |= MSG_MCAST;
1913 
1914 	// test if the packet is really from us
1915 	if (!sDatalinkModule->is_local_address(sDomain, buffer->source, NULL,
1916 			NULL)) {
1917 		TRACE("  ipv4_error_received(): packet was not for us %x -> %x",
1918 			ntohl(header.source), ntohl(header.destination));
1919 		return B_ERROR;
1920 	}
1921 
1922 	buffer->protocol = header.protocol;
1923 
1924 	bufferHeader.Remove(headerLength);
1925 
1926 	net_protocol_module_info* protocol = receiving_protocol(buffer->protocol);
1927 	if (protocol == NULL)
1928 		return B_ERROR;
1929 
1930 	// propagate error
1931 	return protocol->error_received(error, buffer);
1932 }
1933 
1934 
1935 status_t
1936 ipv4_error_reply(net_protocol* protocol, net_buffer* cause, net_error error,
1937 	net_error_data* errorData)
1938 {
1939 	// Directly obtain the ICMP protocol module
1940 	net_protocol_module_info* icmp = receiving_protocol(IPPROTO_ICMP);
1941 	if (icmp == NULL)
1942 		return B_ERROR;
1943 
1944 	return icmp->error_reply(protocol, cause, error, errorData);
1945 }
1946 
1947 
1948 ssize_t
1949 ipv4_process_ancillary_data_no_container(net_protocol* protocol,
1950 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1951 {
1952 	ssize_t bytesWritten = 0;
1953 
1954 	if ((((ipv4_protocol*)protocol)->flags & IP_FLAG_RECEIVE_DEST_ADDR) != 0) {
1955 		if (msgControlLen < CMSG_SPACE(sizeof(struct in_addr)))
1956 			return B_NO_MEMORY;
1957 
1958 		cmsghdr* messageHeader = (cmsghdr*)msgControl;
1959 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
1960 		messageHeader->cmsg_level = IPPROTO_IP;
1961 		messageHeader->cmsg_type = IP_RECVDSTADDR;
1962 
1963 		memcpy(CMSG_DATA(messageHeader),
1964 		 	&((struct sockaddr_in*)buffer->destination)->sin_addr,
1965 		 	sizeof(struct in_addr));
1966 
1967 		bytesWritten += CMSG_SPACE(sizeof(struct in_addr));
1968 	}
1969 
1970 	return bytesWritten;
1971 }
1972 
1973 
1974 //	#pragma mark -
1975 
1976 
1977 status_t
1978 init_ipv4()
1979 {
1980 	sPacketID = (int32)system_time();
1981 
1982 	mutex_init(&sRawSocketsLock, "raw sockets");
1983 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1984 	mutex_init(&sMulticastGroupsLock, "IPv4 multicast groups");
1985 	mutex_init(&sReceivingProtocolLock, "IPv4 receiving protocols");
1986 
1987 	status_t status;
1988 
1989 	sMulticastState = new MulticastState();
1990 	if (sMulticastState == NULL) {
1991 		status = B_NO_MEMORY;
1992 		goto err4;
1993 	}
1994 
1995 	status = sMulticastState->Init();
1996 	if (status != B_OK)
1997 		goto err5;
1998 
1999 	new (&sFragmentHash) FragmentTable();
2000 	status = sFragmentHash.Init(256);
2001 	if (status != B_OK)
2002 		goto err5;
2003 
2004 	new (&sRawSockets) RawSocketList;
2005 		// static initializers do not work in the kernel,
2006 		// so we have to do it here, manually
2007 		// TODO: for modules, this shouldn't be required
2008 
2009 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_RAW, 0,
2010 		"network/protocols/ipv4/v1", NULL);
2011 	if (status != B_OK)
2012 		goto err6;
2013 
2014 	status = gStackModule->register_domain(AF_INET, "internet", &gIPv4Module,
2015 		&gIPv4AddressModule, &sDomain);
2016 	if (status != B_OK)
2017 		goto err6;
2018 
2019 	add_debugger_command("ipv4_multicast", dump_ipv4_multicast,
2020 		"list all current IPv4 multicast states");
2021 
2022 	return B_OK;
2023 
2024 err6:
2025 	sFragmentHash.~FragmentTable();
2026 err5:
2027 	delete sMulticastState;
2028 err4:
2029 	mutex_destroy(&sReceivingProtocolLock);
2030 	mutex_destroy(&sMulticastGroupsLock);
2031 	mutex_destroy(&sFragmentLock);
2032 	mutex_destroy(&sRawSocketsLock);
2033 	return status;
2034 }
2035 
2036 
2037 status_t
2038 uninit_ipv4()
2039 {
2040 	mutex_lock(&sReceivingProtocolLock);
2041 
2042 	remove_debugger_command("ipv4_multicast", dump_ipv4_multicast);
2043 
2044 	// put all the domain receiving protocols we gathered so far
2045 	for (uint32 i = 0; i < 256; i++) {
2046 		if (sReceivingProtocol[i] != NULL)
2047 			gStackModule->put_domain_receiving_protocol(sDomain, i);
2048 	}
2049 
2050 	gStackModule->unregister_domain(sDomain);
2051 	mutex_unlock(&sReceivingProtocolLock);
2052 
2053 	delete sMulticastState;
2054 	sFragmentHash.~FragmentTable();
2055 
2056 	mutex_destroy(&sMulticastGroupsLock);
2057 	mutex_destroy(&sFragmentLock);
2058 	mutex_destroy(&sRawSocketsLock);
2059 	mutex_destroy(&sReceivingProtocolLock);
2060 
2061 	return B_OK;
2062 }
2063 
2064 
2065 static status_t
2066 ipv4_std_ops(int32 op, ...)
2067 {
2068 	switch (op) {
2069 		case B_MODULE_INIT:
2070 			return init_ipv4();
2071 		case B_MODULE_UNINIT:
2072 			return uninit_ipv4();
2073 
2074 		default:
2075 			return B_ERROR;
2076 	}
2077 }
2078 
2079 
2080 net_protocol_module_info gIPv4Module = {
2081 	{
2082 		"network/protocols/ipv4/v1",
2083 		0,
2084 		ipv4_std_ops
2085 	},
2086 	NET_PROTOCOL_ATOMIC_MESSAGES,
2087 
2088 	ipv4_init_protocol,
2089 	ipv4_uninit_protocol,
2090 	ipv4_open,
2091 	ipv4_close,
2092 	ipv4_free,
2093 	ipv4_connect,
2094 	ipv4_accept,
2095 	ipv4_control,
2096 	ipv4_getsockopt,
2097 	ipv4_setsockopt,
2098 	ipv4_bind,
2099 	ipv4_unbind,
2100 	ipv4_listen,
2101 	ipv4_shutdown,
2102 	ipv4_send_data,
2103 	ipv4_send_routed_data,
2104 	ipv4_send_avail,
2105 	ipv4_read_data,
2106 	ipv4_read_avail,
2107 	ipv4_get_domain,
2108 	ipv4_get_mtu,
2109 	ipv4_receive_data,
2110 	ipv4_deliver_data,
2111 	ipv4_error_received,
2112 	ipv4_error_reply,
2113 	NULL,		// add_ancillary_data()
2114 	NULL,		// process_ancillary_data()
2115 	ipv4_process_ancillary_data_no_container,
2116 	NULL,		// send_data_no_buffer()
2117 	NULL		// read_data_no_buffer()
2118 };
2119 
2120 module_dependency module_dependencies[] = {
2121 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
2122 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
2123 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
2124 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
2125 	{}
2126 };
2127 
2128 module_info* modules[] = {
2129 	(module_info*)&gIPv4Module,
2130 	NULL
2131 };
2132