xref: /haiku/src/add-ons/kernel/network/protocols/ipv6/ipv6.cpp (revision 445d4fd926c569e7b9ae28017da86280aaecbae2)
1 /*
2  * Copyright 2006-2011, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *		Atis Elsts, the.kfx@gmail.com
8  */
9 
10 
11 #include "ipv6_address.h"
12 #include "ipv6_utils.h"
13 #include "multicast.h"
14 
15 #include <net_datalink.h>
16 #include <net_datalink_protocol.h>
17 #include <net_device.h>
18 #include <net_protocol.h>
19 #include <net_stack.h>
20 #include <NetBufferUtilities.h>
21 #include <ProtocolUtilities.h>
22 
23 #include <ByteOrder.h>
24 #include <KernelExport.h>
25 #include <StackOrHeapArray.h>
26 #include <util/AutoLock.h>
27 #include <util/list.h>
28 #include <util/DoublyLinkedList.h>
29 #include <util/MultiHashTable.h>
30 
31 #include <netinet6/in6.h>
32 #include <netinet/ip6.h>
33 #include <netinet/icmp6.h>
34 #include <new>
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include <utility>
39 
40 
41 //#define TRACE_IPV6
42 #ifdef TRACE_IPV6
43 	#define TRACE(format, args...) \
44 		dprintf("IPv6 [%" B_PRIdBIGTIME "] " format "\n", system_time(), ##args)
45 	#define TRACE_SK(protocol, format, args...) \
46 		dprintf("IPv6 [%" B_PRIdBIGTIME "] %p " format "\n", system_time(), \
47 			protocol, ##args)
48 #else
49 	#define TRACE(args...)
50 	#define TRACE_SK(args...)
51 #endif
52 
53 
54 #define MAX_HASH_FRAGMENTS 		64
55 	// slots in the fragment packet's hash
56 #define FRAGMENT_TIMEOUT		60000000LL
57 	// discard fragment after 60 seconds [RFC 2460]
58 
59 
60 struct IPv6Header {
61 	struct ip6_hdr header;
62 
63 	uint8 ProtocolVersion() const { return header.ip6_vfc & IPV6_VERSION_MASK; }
64 	uint8 ServiceType() const { return ntohl(header.ip6_flow) >> 20;}
65 	uint16 PayloadLength() const { return ntohs(header.ip6_plen); }
66 	const in6_addr& Dst() const { return header.ip6_dst; }
67 	const in6_addr& Src() const { return header.ip6_src; }
68 	uint8 NextHeader() const { return header.ip6_nxt; }
69 	uint16 GetHeaderOffset(net_buffer* buffer, uint32 headerCode = ~0u) const;
70 };
71 
72 
73 typedef DoublyLinkedList<struct net_buffer,
74 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
75 
76 
77 // TODO: make common fragmentation interface for both address families
78 struct ipv6_packet_key {
79 	in6_addr	source;
80 	in6_addr	destination;
81 	// We use uint32 here due to the hash function
82 	uint32		id;
83 	uint32		protocol;
84 };
85 
86 
87 class FragmentPacket {
88 public:
89 								FragmentPacket(const ipv6_packet_key& key);
90 								~FragmentPacket();
91 
92 			status_t			AddFragment(uint16 start, uint16 end,
93 									net_buffer* buffer, bool lastFragment);
94 			status_t			Reassemble(net_buffer* to);
95 
96 			bool				IsComplete() const
97 									{ return fReceivedLastFragment
98 										&& fBytesLeft == 0; }
99 
100 			const ipv6_packet_key& Key() const { return fKey; }
101 			FragmentPacket*&	HashTableLink() { return fNext; }
102 
103 	static	void				StaleTimer(struct net_timer* timer, void* data);
104 
105 private:
106 			FragmentPacket*		fNext;
107 			struct ipv6_packet_key fKey;
108 			uint32				fIndex;
109 			int32				fBytesLeft;
110 			FragmentList		fFragments;
111 			net_timer			fTimer;
112 			bool				fReceivedLastFragment;
113 };
114 
115 
116 struct FragmentHashDefinition {
117 	typedef ipv6_packet_key KeyType;
118 	typedef FragmentPacket ValueType;
119 
120 	size_t HashKey(const KeyType& key) const
121 	{
122 		return jenkins_hashword((const uint32*)&key,
123 			sizeof(ipv6_packet_key) / sizeof(uint32), 0);
124 	}
125 
126 	size_t Hash(ValueType* value) const
127 	{
128 		return HashKey(value->Key());
129 	}
130 
131 	bool Compare(const KeyType& key, ValueType* value) const
132 	{
133 		const ipv6_packet_key& packetKey = value->Key();
134 
135 		return packetKey.id == key.id
136 			&& packetKey.source == key.source
137 			&& packetKey.destination == key.destination
138 			&& packetKey.protocol == key.protocol;
139 	}
140 
141 	ValueType*& GetLink(ValueType* value) const
142 	{
143 		return value->HashTableLink();
144 	}
145 };
146 
147 
148 typedef BOpenHashTable<FragmentHashDefinition, false, true> FragmentTable;
149 
150 
151 class RawSocket
152 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
153 public:
154 							RawSocket(net_socket* socket);
155 };
156 
157 
158 typedef DoublyLinkedList<RawSocket> RawSocketList;
159 
160 typedef MulticastGroupInterface<IPv6Multicast> IPv6GroupInterface;
161 typedef MulticastFilter<IPv6Multicast> IPv6MulticastFilter;
162 
163 struct MulticastStateHash {
164 	typedef std::pair<const in6_addr*, uint32> KeyType;
165 	typedef IPv6GroupInterface ValueType;
166 
167 	size_t HashKey(const KeyType &key) const;
168 	size_t Hash(ValueType* value) const
169 		{ return HashKey(std::make_pair(&value->Address(),
170 			value->Interface()->index)); }
171 	bool Compare(const KeyType &key, ValueType* value) const
172 		{ return value->Interface()->index == key.second
173 			&& value->Address() == *key.first; }
174 	bool CompareValues(ValueType* value1, ValueType* value2) const
175 		{ return value1->Interface()->index == value2->Interface()->index
176 			&& value1->Address() == value2->Address(); }
177 	ValueType*& GetLink(ValueType* value) const { return value->MulticastGroupsHashLink(); }
178 };
179 
180 
181 struct ipv6_protocol : net_protocol {
182 	ipv6_protocol()
183 		:
184 		raw(NULL),
185 		multicast_filter(this)
186 	{
187 	}
188 
189 	~ipv6_protocol()
190 	{
191 		delete raw;
192 	}
193 
194 	RawSocket	*raw;
195 	uint8		service_type;
196 	uint8		time_to_live;
197 	uint8		multicast_time_to_live;
198 	uint8		receive_hoplimit;
199 	uint8		receive_pktinfo;
200 	struct sockaddr* multicast_address; // for IPV6_MULTICAST_IF
201 
202 	IPv6MulticastFilter multicast_filter;
203 };
204 
205 
206 static const int kDefaultTTL = IPV6_DEFHLIM;
207 static const int kDefaultMulticastTTL = 1;
208 
209 
210 extern net_protocol_module_info gIPv6Module;
211 	// we need this in ipv6_std_ops() for registering the AF_INET6 domain
212 
213 net_stack_module_info* gStackModule;
214 net_buffer_module_info* gBufferModule;
215 
216 static struct net_domain* sDomain;
217 static net_datalink_module_info* sDatalinkModule;
218 static net_socket_module_info* sSocketModule;
219 static RawSocketList sRawSockets;
220 static mutex sRawSocketsLock;
221 static mutex sFragmentLock;
222 static FragmentTable sFragmentHash;
223 static int32 sFragmentID;
224 static mutex sMulticastGroupsLock;
225 
226 typedef MultiHashTable<MulticastStateHash> MulticastState;
227 static MulticastState* sMulticastState;
228 
229 static net_protocol_module_info* sReceivingProtocol[256];
230 static mutex sReceivingProtocolLock;
231 
232 
233 uint16
234 IPv6Header::GetHeaderOffset(net_buffer* buffer, uint32 headerCode) const
235 {
236 	uint16 offset = sizeof(struct ip6_hdr);
237 	uint8 next = header.ip6_nxt;
238 
239 	// these are the extension headers that might be supported one day
240 	while (next != headerCode
241 		&& (next == IPPROTO_HOPOPTS
242 			|| next == IPPROTO_ROUTING
243 			|| next == IPPROTO_FRAGMENT
244 			|| next == IPPROTO_ESP
245 			|| next == IPPROTO_AH
246 			|| next == IPPROTO_DSTOPTS)) {
247 		struct ip6_ext extensionHeader;
248 		status_t status = gBufferModule->read(buffer, offset,
249 			&extensionHeader, sizeof(ip6_ext));
250 		if (status != B_OK)
251 			break;
252 
253 		next = extensionHeader.ip6e_nxt;
254 		offset += extensionHeader.ip6e_len;
255 	}
256 
257 	// were we looking for a specific header?
258 	if (headerCode != ~0u) {
259 		if (next == headerCode) {
260 			// found the specific header
261 			return offset;
262 		}
263 		// return 0 if fragement header is not present
264 		return 0;
265 	}
266 
267 	// the general transport layer header case
268 	buffer->protocol = next;
269 	return offset;
270 }
271 
272 
273 RawSocket::RawSocket(net_socket* socket)
274 	:
275 	DatagramSocket<>("ipv6 raw socket", socket)
276 {
277 }
278 
279 
280 //	#pragma mark -
281 
282 
283 FragmentPacket::FragmentPacket(const ipv6_packet_key &key)
284 	:
285 	fKey(key),
286 	fBytesLeft(IPV6_MAXPACKET),
287 	fReceivedLastFragment(false)
288 {
289 	gStackModule->init_timer(&fTimer, FragmentPacket::StaleTimer, this);
290 }
291 
292 
293 FragmentPacket::~FragmentPacket()
294 {
295 	// cancel the kill timer
296 	gStackModule->set_timer(&fTimer, -1);
297 
298 	// delete all fragments
299 	net_buffer* buffer;
300 	while ((buffer = fFragments.RemoveHead()) != NULL) {
301 		gBufferModule->free(buffer);
302 	}
303 }
304 
305 
306 status_t
307 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
308 	bool lastFragment)
309 {
310 	// restart the timer
311 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
312 
313 	if (start >= end) {
314 		// invalid fragment
315 		return B_BAD_DATA;
316 	}
317 
318 	// Search for a position in the list to insert the fragment
319 
320 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
321 	net_buffer* previous = NULL;
322 	net_buffer* next = NULL;
323 	while ((previous = iterator.Next()) != NULL) {
324 		if (previous->fragment.start <= start) {
325 			// The new fragment can be inserted after this one
326 			break;
327 		}
328 
329 		next = previous;
330 	}
331 
332 	// See if we already have the fragment's data
333 
334 	if (previous != NULL && previous->fragment.start <= start
335 		&& previous->fragment.end >= end) {
336 		// we do, so we can just drop this fragment
337 		gBufferModule->free(buffer);
338 		return B_OK;
339 	}
340 
341 	fIndex = buffer->index;
342 		// adopt the buffer's device index
343 
344 	TRACE("    previous: %p, next: %p", previous, next);
345 
346 	// If we have parts of the data already, truncate as needed
347 
348 	if (previous != NULL && previous->fragment.end > start) {
349 		TRACE("    remove header %d bytes", previous->fragment.end - start);
350 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
351 		start = previous->fragment.end;
352 	}
353 	if (next != NULL && end > next->fragment.start) {
354 		TRACE("    remove trailer %d bytes", end - next->fragment.start);
355 		gBufferModule->remove_trailer(buffer, end - next->fragment.start);
356 		end = next->fragment.start;
357 	}
358 
359 	// Now try if we can already merge the fragments together
360 
361 	// We will always keep the last buffer received, so that we can still
362 	// report an error (in which case we're not responsible for freeing it)
363 
364 	if (previous != NULL && previous->fragment.end == start) {
365 		fFragments.Remove(previous);
366 
367 		buffer->fragment.start = previous->fragment.start;
368 		buffer->fragment.end = end;
369 
370 		status_t status = gBufferModule->merge(buffer, previous, false);
371 		TRACE("    merge previous: %s", strerror(status));
372 		if (status != B_OK) {
373 			fFragments.InsertBefore(next, previous);
374 			return status;
375 		}
376 
377 		fFragments.InsertBefore(next, buffer);
378 
379 		// cut down existing hole
380 		fBytesLeft -= end - start;
381 
382 		if (lastFragment && !fReceivedLastFragment) {
383 			fReceivedLastFragment = true;
384 			fBytesLeft -= IPV6_MAXPACKET - end;
385 		}
386 
387 		TRACE("    hole length: %d", (int)fBytesLeft);
388 
389 		return B_OK;
390 	} else if (next != NULL && next->fragment.start == end) {
391 		net_buffer* afterNext = (net_buffer*)next->link.next;
392 		fFragments.Remove(next);
393 
394 		buffer->fragment.start = start;
395 		buffer->fragment.end = next->fragment.end;
396 
397 		status_t status = gBufferModule->merge(buffer, next, true);
398 		TRACE("    merge next: %s", strerror(status));
399 		if (status != B_OK) {
400 			// Insert "next" at its previous position
401 			fFragments.InsertBefore(afterNext, next);
402 			return status;
403 		}
404 
405 		fFragments.InsertBefore(afterNext, buffer);
406 
407 		// cut down existing hole
408 		fBytesLeft -= end - start;
409 
410 		if (lastFragment && !fReceivedLastFragment) {
411 			fReceivedLastFragment = true;
412 			fBytesLeft -= IPV6_MAXPACKET - end;
413 		}
414 
415 		TRACE("    hole length: %d", (int)fBytesLeft);
416 
417 		return B_OK;
418 	}
419 
420 	// We couldn't merge the fragments, so we need to add it as is
421 
422 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
423 
424 	buffer->fragment.start = start;
425 	buffer->fragment.end = end;
426 	fFragments.InsertBefore(next, buffer);
427 
428 	// update length of the hole, if any
429 	fBytesLeft -= end - start;
430 
431 	if (lastFragment && !fReceivedLastFragment) {
432 		fReceivedLastFragment = true;
433 		fBytesLeft -= IPV6_MAXPACKET - end;
434 	}
435 
436 	TRACE("    hole length: %d", (int)fBytesLeft);
437 
438 	return B_OK;
439 }
440 
441 
442 /*!	Reassembles the fragments to the specified buffer \a to.
443 	This buffer must have been added via AddFragment() before.
444 */
445 status_t
446 FragmentPacket::Reassemble(net_buffer* to)
447 {
448 	if (!IsComplete())
449 		return B_ERROR;
450 
451 	net_buffer* buffer = NULL;
452 
453 	net_buffer* fragment;
454 	while ((fragment = fFragments.RemoveHead()) != NULL) {
455 		if (buffer != NULL) {
456 			status_t status;
457 			if (to == fragment) {
458 				status = gBufferModule->merge(fragment, buffer, false);
459 				buffer = fragment;
460 			} else
461 				status = gBufferModule->merge(buffer, fragment, true);
462 			if (status != B_OK)
463 				return status;
464 		} else
465 			buffer = fragment;
466 	}
467 
468 	if (buffer != to)
469 		panic("ipv6 packet reassembly did not work correctly.");
470 
471 	to->index = fIndex;
472 		// reset the buffer's device index
473 
474 	return B_OK;
475 }
476 
477 
478 /*static*/ void
479 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
480 {
481 	FragmentPacket* packet = (FragmentPacket*)data;
482 	TRACE("Assembling FragmentPacket %p timed out!", packet);
483 
484 	MutexLocker locker(&sFragmentLock);
485 	sFragmentHash.Remove(packet);
486 	locker.Unlock();
487 
488 	if (!packet->fFragments.IsEmpty()) {
489 		// Send error: fragment reassembly time exceeded
490 		sDomain->module->error_reply(NULL, packet->fFragments.First(),
491 			B_NET_ERROR_REASSEMBLY_TIME_EXCEEDED, NULL);
492 	}
493 
494 	delete packet;
495 }
496 
497 
498 //	#pragma mark -
499 
500 
501 size_t
502 MulticastStateHash::HashKey(const KeyType &key) const
503 {
504 	size_t result = 0;
505 	result = jenkins_hashword((const uint32*)key.first,
506 		sizeof(in6_addr) / sizeof(uint32), result);
507 	result = jenkins_hashword(&key.second, 1, result);
508 	return result;
509 }
510 
511 
512 //	#pragma mark -
513 
514 
515 static inline void
516 dump_ipv6_header(IPv6Header &header)
517 {
518 #ifdef TRACE_IPV6
519 	char addrbuf[INET6_ADDRSTRLEN];
520 	dprintf("  version: %d\n", header.ProtocolVersion() >> 4);
521 	dprintf("  service_type: %d\n", header.ServiceType());
522 	dprintf("  payload_length: %d\n", header.PayloadLength());
523 	dprintf("  next_header: %d\n", header.NextHeader());
524 	dprintf("  hop_limit: %d\n", header.header.ip6_hops);
525 	dprintf("  source: %s\n", ip6_sprintf(&header.header.ip6_src, addrbuf));
526 	dprintf("  destination: %s\n",
527 		ip6_sprintf(&header.header.ip6_dst, addrbuf));
528 #endif
529 }
530 
531 
532 /*!	Attempts to re-assemble fragmented packets.
533 	\return B_OK if everything went well; if it could reassemble the packet,
534 		\a _buffer will point to its buffer, otherwise, it will be \c NULL.
535 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
536 */
537 static status_t
538 reassemble_fragments(const IPv6Header &header, net_buffer** _buffer,
539 	uint16 offset)
540 {
541 	net_buffer* buffer = *_buffer;
542 	status_t status;
543 
544 	ip6_frag fragmentHeader;
545 	status = gBufferModule->read(buffer, offset, &fragmentHeader,
546 		sizeof(ip6_frag));
547 
548 	if (status != B_OK)
549 		return status;
550 
551 	struct ipv6_packet_key key;
552 	memcpy(&key.source, &header.Src(), sizeof(in6_addr));
553 	memcpy(&key.destination, &header.Dst(), sizeof(in6_addr));
554 	key.id = fragmentHeader.ip6f_ident;
555 	key.protocol = fragmentHeader.ip6f_nxt;
556 
557 	// TODO: Make locking finer grained.
558 	MutexLocker locker(&sFragmentLock);
559 
560 	FragmentPacket* packet = sFragmentHash.Lookup(key);
561 	if (packet == NULL) {
562 		// New fragment packet
563 		packet = new (std::nothrow) FragmentPacket(key);
564 		if (packet == NULL)
565 			return B_NO_MEMORY;
566 
567 		// add packet to hash
568 		status = sFragmentHash.Insert(packet);
569 		if (status != B_OK) {
570 			delete packet;
571 			return status;
572 		}
573 	}
574 
575 	uint16 start = ntohs(fragmentHeader.ip6f_offlg & IP6F_OFF_MASK);
576 	uint16 end = start + header.PayloadLength();
577 	bool lastFragment = (fragmentHeader.ip6f_offlg & IP6F_MORE_FRAG) == 0;
578 
579 	TRACE("   Received IPv6 %sfragment of size %d, offset %d.",
580 		lastFragment ? "last ": "", end - start, start);
581 
582 	// Remove header unless this is the first fragment
583 	if (start != 0)
584 		gBufferModule->remove_header(buffer, offset);
585 
586 	status = packet->AddFragment(start, end, buffer, lastFragment);
587 	if (status != B_OK)
588 		return status;
589 
590 	if (packet->IsComplete()) {
591 		sFragmentHash.Remove(packet);
592 			// no matter if reassembling succeeds, we won't need this packet
593 			// anymore
594 
595 		status = packet->Reassemble(buffer);
596 		delete packet;
597 
598 		// _buffer does not change
599 		return status;
600 	}
601 
602 	// This indicates that the packet is not yet complete
603 	*_buffer = NULL;
604 	return B_OK;
605 }
606 
607 
608 /*!	Fragments the incoming buffer and send all fragments via the specified
609 	\a route.
610 */
611 static status_t
612 send_fragments(ipv6_protocol* protocol, struct net_route* route,
613 	net_buffer* buffer, uint32 mtu)
614 {
615 	TRACE_SK(protocol, "SendFragments(%" B_PRIu32 " bytes, mtu %" B_PRIu32 ")",
616 		buffer->size, mtu);
617 
618 	NetBufferHeaderReader<IPv6Header> originalHeader(buffer);
619 	if (originalHeader.Status() != B_OK)
620 		return originalHeader.Status();
621 
622 	// TODO: currently FragHeader goes always as the last one, but in theory
623 	// ext. headers like AuthHeader and DestOptions should go after it.
624 	uint16 headersLength = originalHeader->GetHeaderOffset(buffer);
625 	uint16 extensionHeadersLength = headersLength
626 		- sizeof(ip6_hdr) + sizeof(ip6_frag);
627 	uint32 bytesLeft = buffer->size - headersLength;
628 	uint32 fragmentOffset = 0;
629 	status_t status = B_OK;
630 
631 	// TODO: this is rather inefficient
632 	net_buffer* headerBuffer = gBufferModule->clone(buffer, false);
633 	if (headerBuffer == NULL)
634 		return B_NO_MEMORY;
635 
636 	status = gBufferModule->remove_trailer(headerBuffer, bytesLeft);
637 	if (status != B_OK)
638 		return status;
639 
640 	BStackOrHeapArray<uint8, 128> data(bytesLeft);
641 	if (!data.IsValid())
642 		return B_NO_MEMORY;
643 	status = gBufferModule->read(buffer, headersLength, data, bytesLeft);
644 	if (status != B_OK)
645 		return status;
646 
647 	// TODO (from ipv4): we need to make sure all header space is contiguous or
648 	// use another construct.
649 	NetBufferHeaderReader<IPv6Header> bufferHeader(headerBuffer);
650 
651 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
652 	// this way)
653 	mtu -= headersLength + sizeof(ip6_frag);
654 	mtu &= ~7;
655 	TRACE("  adjusted MTU to %" B_PRIu32 " bytesLeft %" B_PRIu32, mtu,
656 		bytesLeft);
657 
658 	while (bytesLeft > 0) {
659 		uint32 fragmentLength = min_c(bytesLeft, mtu);
660 		bytesLeft -= fragmentLength;
661 		bool lastFragment = bytesLeft == 0;
662 
663 		bufferHeader->header.ip6_nxt = IPPROTO_FRAGMENT;
664 		bufferHeader->header.ip6_plen
665 			= htons(fragmentLength + extensionHeadersLength);
666 		bufferHeader.Sync();
667 
668 		ip6_frag fragmentHeader;
669 		fragmentHeader.ip6f_nxt = originalHeader->NextHeader();
670 		fragmentHeader.ip6f_reserved = 0;
671 		fragmentHeader.ip6f_offlg = htons(fragmentOffset) & IP6F_OFF_MASK;
672 		if (!lastFragment)
673 			fragmentHeader.ip6f_offlg |= IP6F_MORE_FRAG;
674 		fragmentHeader.ip6f_ident = htonl(atomic_add(&sFragmentID, 1));
675 
676 		TRACE("  send fragment of %" B_PRIu32 " bytes (%" B_PRIu32
677 			" bytes left)", fragmentLength, bytesLeft);
678 
679 		net_buffer* fragmentBuffer;
680 		if (!lastFragment)
681 			fragmentBuffer = gBufferModule->clone(headerBuffer, false);
682 		else
683 			fragmentBuffer = buffer;
684 
685 		if (fragmentBuffer == NULL) {
686 			status = B_NO_MEMORY;
687 			break;
688 		}
689 
690 		// copy data to fragment
691 		do {
692 			status = gBufferModule->append(
693 				fragmentBuffer, &fragmentHeader, sizeof(ip6_frag));
694 			if (status != B_OK)
695 				break;
696 
697 			status = gBufferModule->append(
698 				fragmentBuffer, &data[fragmentOffset], fragmentLength);
699 			if (status != B_OK)
700 				break;
701 
702 			// send fragment
703 			status = sDatalinkModule->send_routed_data(route, fragmentBuffer);
704 		} while (false);
705 
706 		if (lastFragment) {
707 			// we don't own the last buffer, so we don't have to free it
708 			break;
709 		}
710 
711 		if (status != B_OK) {
712 			gBufferModule->free(fragmentBuffer);
713 			break;
714 		}
715 
716 		fragmentOffset += fragmentLength;
717 	}
718 
719 	gBufferModule->free(headerBuffer);
720 	return status;
721 }
722 
723 
724 static status_t
725 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
726 	bool deliverToRaw, net_interface *interface)
727 {
728 	sockaddr_in6* multicastAddr = (sockaddr_in6*)buffer->destination;
729 
730 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
731 		&multicastAddr->sin6_addr, interface->index));
732 
733 	while (it.HasNext()) {
734 		IPv6GroupInterface* state = it.Next();
735 		ipv6_protocol* ipproto = state->Parent()->Socket();
736 
737 		if (deliverToRaw && ipproto->raw == NULL)
738 			continue;
739 
740 		if (state->FilterAccepts(buffer)) {
741 			// TODO: do as in IPv4 code
742 			module->deliver_data(ipproto, buffer);
743 		}
744 	}
745 
746 	return B_OK;
747 }
748 
749 
750 static status_t
751 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
752 	bool deliverToRaw)
753 {
754 	if (module->deliver_data == NULL)
755 		return B_OK;
756 
757 	MutexLocker _(sMulticastGroupsLock);
758 
759 	status_t status = B_OK;
760 	if (buffer->interface_address != NULL) {
761 		status = deliver_multicast(module, buffer, deliverToRaw,
762 			buffer->interface_address->interface);
763 	} else {
764 #if 0 //  FIXME: multicast
765 		net_domain_private* domain = (net_domain_private*)sDomain;
766 		RecursiveLocker locker(domain->lock);
767 
768 		net_interface* interface = NULL;
769 		while (true) {
770 			interface = (net_interface*)list_get_next_item(
771 				&domain->interfaces, interface);
772 			if (interface == NULL)
773 				break;
774 
775 			status = deliver_multicast(module, buffer, deliverToRaw, interface);
776 			if (status < B_OK)
777 				break;
778 		}
779 #endif
780 	}
781 	return status;
782 }
783 
784 
785 static void
786 raw_receive_data(net_buffer* buffer)
787 {
788 	MutexLocker locker(sRawSocketsLock);
789 
790 	if (sRawSockets.IsEmpty())
791 		return;
792 
793 	TRACE("RawReceiveData(%i)", buffer->protocol);
794 
795 	if ((buffer->flags & MSG_MCAST) != 0) {
796 		deliver_multicast(&gIPv6Module, buffer, true);
797 	} else {
798 		RawSocketList::Iterator iterator = sRawSockets.GetIterator();
799 
800 		while (iterator.HasNext()) {
801 			RawSocket* raw = iterator.Next();
802 
803 			if (raw->Socket()->protocol == buffer->protocol)
804 				raw->EnqueueClone(buffer);
805 		}
806 	}
807 }
808 
809 
810 static inline sockaddr*
811 fill_sockaddr_in6(sockaddr_in6* target, const in6_addr &address)
812 {
813 	target->sin6_family = AF_INET6;
814 	target->sin6_len = sizeof(sockaddr_in6);
815 	target->sin6_port = 0;
816 	target->sin6_flowinfo = 0;
817 	memcpy(target->sin6_addr.s6_addr, address.s6_addr, sizeof(in6_addr));
818 	target->sin6_scope_id = 0;
819 	return (sockaddr*)target;
820 }
821 
822 
823 status_t
824 IPv6Multicast::JoinGroup(IPv6GroupInterface* state)
825 {
826 	MutexLocker _(sMulticastGroupsLock);
827 
828 	sockaddr_in6 groupAddr;
829 	status_t status = sDatalinkModule->join_multicast(state->Interface(),
830 		sDomain, fill_sockaddr_in6(&groupAddr, state->Address()));
831 	if (status != B_OK)
832 		return status;
833 
834 	sMulticastState->Insert(state);
835 	return B_OK;
836 }
837 
838 
839 status_t
840 IPv6Multicast::LeaveGroup(IPv6GroupInterface* state)
841 {
842 	MutexLocker _(sMulticastGroupsLock);
843 
844 	sMulticastState->Remove(state);
845 
846 	sockaddr_in6 groupAddr;
847 	return sDatalinkModule->leave_multicast(state->Interface(), sDomain,
848 		fill_sockaddr_in6(&groupAddr, state->Address()));
849 }
850 
851 
852 static net_protocol_module_info*
853 receiving_protocol(uint8 protocol)
854 {
855 	net_protocol_module_info* module = sReceivingProtocol[protocol];
856 	if (module != NULL)
857 		return module;
858 
859 	MutexLocker locker(sReceivingProtocolLock);
860 
861 	module = sReceivingProtocol[protocol];
862 	if (module != NULL)
863 		return module;
864 
865 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
866 			&module) == B_OK)
867 		sReceivingProtocol[protocol] = module;
868 
869 	return module;
870 }
871 
872 
873 static status_t
874 ipv6_delta_group(IPv6GroupInterface* group, int option,
875 	net_interface* interface, const in6_addr* sourceAddr)
876 {
877 	switch (option) {
878 		case IPV6_JOIN_GROUP:
879 			return group->Add();
880 		case IPV6_LEAVE_GROUP:
881 			return group->Drop();
882 	}
883 
884 	return B_ERROR;
885 }
886 
887 
888 static status_t
889 ipv6_delta_membership(ipv6_protocol* protocol, int option,
890 	net_interface* interface, const in6_addr* groupAddr,
891 	const in6_addr* sourceAddr)
892 {
893 	IPv6MulticastFilter &filter = protocol->multicast_filter;
894 	IPv6GroupInterface* state = NULL;
895 	status_t status = B_OK;
896 
897 	switch (option) {
898 		// TODO: support more options
899 		case IPV6_JOIN_GROUP:
900 			status = filter.GetState(*groupAddr, interface, state, true);
901 			break;
902 
903 		case IPV6_LEAVE_GROUP:
904 			filter.GetState(*groupAddr, interface, state, false);
905 			if (state == NULL)
906 				return EADDRNOTAVAIL;
907 			break;
908 	}
909 
910 	if (status != B_OK)
911 		return status;
912 
913 	status = ipv6_delta_group(state, option, interface, sourceAddr);
914 	filter.ReturnState(state);
915 	return status;
916 }
917 
918 
919 static status_t
920 ipv6_delta_membership(ipv6_protocol* protocol, int option,
921 	uint32 interfaceIndex, in6_addr* groupAddr, in6_addr* sourceAddr)
922 {
923 	net_interface* interface;
924 
925 	// TODO: can the interface be unspecified?
926 	interface = sDatalinkModule->get_interface(sDomain, interfaceIndex);
927 
928 	if (interface == NULL)
929 		return B_DEVICE_NOT_FOUND;
930 
931 	return ipv6_delta_membership(protocol, option, interface,
932 		groupAddr, sourceAddr);
933 }
934 
935 
936 static status_t
937 get_int_option(void* target, size_t length, int value)
938 {
939 	if (length != sizeof(int))
940 		return B_BAD_VALUE;
941 
942 	return user_memcpy(target, &value, sizeof(int));
943 }
944 
945 
946 template<typename Type> static status_t
947 set_int_option(Type &target, const void* _value, size_t length)
948 {
949 	int value;
950 
951 	if (length != sizeof(int))
952 		return B_BAD_VALUE;
953 
954 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
955 		return B_BAD_ADDRESS;
956 
957 	target = value;
958 	return B_OK;
959 }
960 
961 
962 //	#pragma mark -
963 
964 
965 net_protocol*
966 ipv6_init_protocol(net_socket* socket)
967 {
968 	ipv6_protocol* protocol = new (std::nothrow) ipv6_protocol();
969 	if (protocol == NULL)
970 		return NULL;
971 
972 	protocol->raw = NULL;
973 	protocol->service_type = 0;
974 	protocol->time_to_live = kDefaultTTL;
975 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
976 	protocol->receive_hoplimit = 0;
977 	protocol->receive_pktinfo = 0;
978 	protocol->multicast_address = NULL;
979 	return protocol;
980 }
981 
982 
983 status_t
984 ipv6_uninit_protocol(net_protocol* _protocol)
985 {
986 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
987 
988 	delete protocol;
989 	return B_OK;
990 }
991 
992 
993 /*!	Since open() is only called on the top level protocol, when we get here
994 	it means we are on a SOCK_RAW socket.
995 */
996 status_t
997 ipv6_open(net_protocol* _protocol)
998 {
999 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1000 
1001 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1002 	if (raw == NULL)
1003 		return B_NO_MEMORY;
1004 
1005 	status_t status = raw->InitCheck();
1006 	if (status != B_OK) {
1007 		delete raw;
1008 		return status;
1009 	}
1010 
1011 	TRACE_SK(protocol, "Open()");
1012 
1013 	protocol->raw = raw;
1014 
1015 	MutexLocker locker(sRawSocketsLock);
1016 	sRawSockets.Add(raw);
1017 	return B_OK;
1018 }
1019 
1020 
1021 status_t
1022 ipv6_close(net_protocol* _protocol)
1023 {
1024 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1025 	RawSocket* raw = protocol->raw;
1026 	if (raw == NULL)
1027 		return B_ERROR;
1028 
1029 	TRACE_SK(protocol, "Close()");
1030 
1031 	MutexLocker locker(sRawSocketsLock);
1032 	sRawSockets.Remove(raw);
1033 	delete raw;
1034 	protocol->raw = NULL;
1035 
1036 	return B_OK;
1037 }
1038 
1039 
1040 status_t
1041 ipv6_free(net_protocol* protocol)
1042 {
1043 	return B_OK;
1044 }
1045 
1046 
1047 status_t
1048 ipv6_connect(net_protocol* protocol, const struct sockaddr* address)
1049 {
1050 	return B_ERROR;
1051 }
1052 
1053 
1054 status_t
1055 ipv6_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1056 {
1057 	return EOPNOTSUPP;
1058 }
1059 
1060 
1061 status_t
1062 ipv6_control(net_protocol* _protocol, int level, int option, void* value,
1063 	size_t* _length)
1064 {
1065 	if ((level & LEVEL_MASK) != IPPROTO_IPV6)
1066 		return sDatalinkModule->control(sDomain, option, value, _length);
1067 
1068 	return B_BAD_VALUE;
1069 }
1070 
1071 
1072 status_t
1073 ipv6_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1074 	int* _length)
1075 {
1076 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1077 
1078 	if (level == IPPROTO_IPV6) {
1079 		// TODO: support more of these options
1080 
1081 		if (option == IPV6_MULTICAST_HOPS) {
1082 			return get_int_option(value, *_length,
1083 				protocol->multicast_time_to_live);
1084 		}
1085 		if (option == IPV6_MULTICAST_LOOP)
1086 			return EOPNOTSUPP;
1087 		if (option == IPV6_UNICAST_HOPS)
1088 			return get_int_option(value, *_length, protocol->time_to_live);
1089 		if (option == IPV6_V6ONLY)
1090 			return EOPNOTSUPP;
1091 		if (option == IPV6_RECVPKTINFO)
1092 			return get_int_option(value, *_length, protocol->receive_pktinfo);
1093 		if (option == IPV6_RECVHOPLIMIT)
1094 			return get_int_option(value, *_length, protocol->receive_hoplimit);
1095 		if (option == IPV6_JOIN_GROUP
1096 			|| option == IPV6_LEAVE_GROUP)
1097 			return EOPNOTSUPP;
1098 
1099 		dprintf("IPv6::getsockopt(): get unknown option: %d\n", option);
1100 		return ENOPROTOOPT;
1101 	}
1102 
1103 	return sSocketModule->get_option(protocol->socket, level, option, value,
1104 		_length);
1105 }
1106 
1107 
1108 status_t
1109 ipv6_setsockopt(net_protocol* _protocol, int level, int option,
1110 	const void* value, int length)
1111 {
1112 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1113 
1114 	if (level == IPPROTO_IPV6) {
1115 		// TODO: support more of these options
1116 
1117 		if (option == IPV6_MULTICAST_IF) {
1118 			if (length != sizeof(struct in6_addr))
1119 				return B_BAD_VALUE;
1120 
1121 			struct sockaddr_in6* address = new (std::nothrow) sockaddr_in6;
1122 			if (address == NULL)
1123 				return B_NO_MEMORY;
1124 
1125 			if (user_memcpy(&address->sin6_addr, value, sizeof(in6_addr))
1126 					!= B_OK) {
1127 				delete address;
1128 				return B_BAD_ADDRESS;
1129 			}
1130 
1131 			// Using the unspecifed address to remove the previous setting.
1132 			if (IN6_IS_ADDR_UNSPECIFIED(&address->sin6_addr)) {
1133 				delete address;
1134 				delete protocol->multicast_address;
1135 				protocol->multicast_address = NULL;
1136 				return B_OK;
1137 			}
1138 
1139 			struct net_interface* interface
1140 				= sDatalinkModule->get_interface_with_address(
1141 					(sockaddr*)address);
1142 			if (interface == NULL) {
1143 				delete address;
1144 				return EADDRNOTAVAIL;
1145 			}
1146 
1147 			delete protocol->multicast_address;
1148 			protocol->multicast_address = (struct sockaddr*)address;
1149 
1150 			sDatalinkModule->put_interface(interface);
1151 			return B_OK;
1152 		}
1153 		if (option == IPV6_MULTICAST_HOPS) {
1154 			return set_int_option(protocol->multicast_time_to_live,
1155 				value, length);
1156 		}
1157 		if (option == IPV6_MULTICAST_LOOP)
1158 			return EOPNOTSUPP;
1159 		if (option == IPV6_UNICAST_HOPS)
1160 			return set_int_option(protocol->time_to_live, value, length);
1161 		if (option == IPV6_V6ONLY)
1162 			return EOPNOTSUPP;
1163 		if (option == IPV6_RECVPKTINFO)
1164 			return set_int_option(protocol->receive_pktinfo, value, length);
1165 		if (option == IPV6_RECVHOPLIMIT)
1166 			return set_int_option(protocol->receive_hoplimit, value, length);
1167 		if (option == IPV6_JOIN_GROUP || option == IPV6_LEAVE_GROUP) {
1168 			ipv6_mreq mreq;
1169 			if (length != sizeof(ipv6_mreq))
1170 				return B_BAD_VALUE;
1171 			if (user_memcpy(&mreq, value, sizeof(ipv6_mreq)) != B_OK)
1172 				return B_BAD_ADDRESS;
1173 
1174 			return ipv6_delta_membership(protocol, option,
1175 				mreq.ipv6mr_interface, &mreq.ipv6mr_multiaddr, NULL);
1176 		}
1177 
1178 		dprintf("IPv6::setsockopt(): set unknown option: %d\n", option);
1179 		return ENOPROTOOPT;
1180 	}
1181 
1182 	return sSocketModule->set_option(protocol->socket, level, option,
1183 		value, length);
1184 }
1185 
1186 
1187 status_t
1188 ipv6_bind(net_protocol* protocol, const sockaddr* _address)
1189 {
1190 	if (_address->sa_family != AF_INET6)
1191 		return EAFNOSUPPORT;
1192 
1193 	const sockaddr_in6* address = (const sockaddr_in6*)_address;
1194 
1195 	// only INADDR_ANY and addresses of local interfaces are accepted:
1196 	if (IN6_IS_ADDR_UNSPECIFIED(&address->sin6_addr)
1197 		|| IN6_IS_ADDR_MULTICAST(&address->sin6_addr)
1198 		|| sDatalinkModule->is_local_address(sDomain, _address, NULL, NULL)) {
1199 		memcpy(&protocol->socket->address, address, sizeof(sockaddr_in6));
1200 		protocol->socket->address.ss_len = sizeof(sockaddr_in6);
1201 			// explicitly set length, as our callers can't be trusted to
1202 			// always provide the correct length!
1203 		return B_OK;
1204 	}
1205 
1206 	return B_ERROR;
1207 		// address is unknown on this host
1208 }
1209 
1210 
1211 status_t
1212 ipv6_unbind(net_protocol* protocol, struct sockaddr* address)
1213 {
1214 	// nothing to do here
1215 	return B_OK;
1216 }
1217 
1218 
1219 status_t
1220 ipv6_listen(net_protocol* protocol, int count)
1221 {
1222 	return EOPNOTSUPP;
1223 }
1224 
1225 
1226 status_t
1227 ipv6_shutdown(net_protocol* protocol, int direction)
1228 {
1229 	return EOPNOTSUPP;
1230 }
1231 
1232 
1233 static uint8
1234 ip6_select_hoplimit(net_protocol* _protocol, net_buffer* buffer)
1235 {
1236 	// TODO: the precedence should be as follows:
1237 	// 1. Hoplimit value specified via ioctl.
1238 	// 2. (If the outgoing interface is detected) the current
1239 	//     hop limit of the interface specified by router advertisement.
1240 	// 3. The system default hoplimit.
1241 
1242 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1243 	const bool isMulticast = buffer->flags & MSG_MCAST;
1244 
1245 	if (protocol) {
1246 		return isMulticast ? protocol->multicast_time_to_live
1247 			: protocol->time_to_live;
1248 	}
1249 	return isMulticast ? kDefaultMulticastTTL : kDefaultTTL;
1250 }
1251 
1252 
1253 status_t
1254 ipv6_send_routed_data(net_protocol* _protocol, struct net_route* route,
1255 	net_buffer* buffer)
1256 {
1257 	if (route == NULL)
1258 		return B_BAD_VALUE;
1259 
1260 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1261 	net_interface* interface = route->interface_address->interface;
1262 	uint8 protocolNumber;
1263 	if (protocol != NULL && protocol->socket != NULL)
1264 		protocolNumber = protocol->socket->protocol;
1265 	else
1266 		protocolNumber = buffer->protocol;
1267 
1268 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%" B_PRIu32 " bytes])", route,
1269 		buffer, buffer->size);
1270 
1271 	sockaddr_in6& source = *(sockaddr_in6*)buffer->source;
1272 	sockaddr_in6& destination = *(sockaddr_in6*)buffer->destination;
1273 
1274 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1275 
1276 	if (IN6_IS_ADDR_UNSPECIFIED(&destination.sin6_addr))
1277 		return EDESTADDRREQ;
1278 
1279 	if (IN6_IS_ADDR_MULTICAST(&destination.sin6_addr))
1280 		buffer->flags |= MSG_MCAST;
1281 
1282 	uint16 dataLength = buffer->size;
1283 
1284 	// Add IPv6 header
1285 
1286 	NetBufferPrepend<ip6_hdr> header(buffer);
1287 	if (header.Status() != B_OK)
1288 		return header.Status();
1289 
1290 	if (buffer->size > 0xffff)
1291 		return EMSGSIZE;
1292 
1293 	uint32 flowinfo = 0;
1294 		// TODO: fill in the flow id from somewhere
1295 	if (protocol) {
1296 		// fill in traffic class
1297 		flowinfo |= htonl(protocol->service_type << 20);
1298 	}
1299 	// set lower 28 bits
1300 	header->ip6_flow = htonl(flowinfo) & IPV6_FLOWINFO_MASK;
1301 	// set upper 4 bits
1302 	header->ip6_vfc |= IPV6_VERSION;
1303 	header->ip6_plen = htons(dataLength);
1304 	header->ip6_nxt = protocolNumber;
1305 	header->ip6_hlim = ip6_select_hoplimit(protocol, buffer);
1306 	memcpy(&header->ip6_src, &source.sin6_addr, sizeof(in6_addr));
1307 	memcpy(&header->ip6_dst, &destination.sin6_addr, sizeof(in6_addr));
1308 
1309 	header.Sync();
1310 
1311 	// write the checksum for ICMPv6 sockets
1312 	if (protocolNumber == IPPROTO_ICMPV6
1313 		&& dataLength >= sizeof(struct icmp6_hdr)) {
1314 		NetBufferField<uint16, sizeof(ip6_hdr)
1315 			+ offsetof(icmp6_hdr, icmp6_cksum)>
1316 			icmpChecksum(buffer);
1317 		// first make sure the existing checksum is zero
1318 		*icmpChecksum = 0;
1319 		icmpChecksum.Sync();
1320 
1321 		uint16 checksum = gBufferModule->checksum(buffer, sizeof(ip6_hdr),
1322 			buffer->size - sizeof(ip6_hdr), false);
1323 		checksum = ipv6_checksum(&header->ip6_src,
1324 			&header->ip6_dst, dataLength, protocolNumber,
1325 			checksum);
1326 		*icmpChecksum = checksum;
1327 	}
1328 
1329 	char addrbuf[INET6_ADDRSTRLEN];
1330 	ip6_sprintf(&destination.sin6_addr, addrbuf);
1331 	TRACE_SK(protocol, "  SendRoutedData(): destination: %s", addrbuf);
1332 
1333 	uint32 mtu = route->mtu ? route->mtu : interface->device->mtu;
1334 	if (buffer->size > mtu) {
1335 		// we need to fragment the packet
1336 		return send_fragments(protocol, route, buffer, mtu);
1337 	}
1338 
1339 	return sDatalinkModule->send_routed_data(route, buffer);
1340 }
1341 
1342 
1343 status_t
1344 ipv6_send_data(net_protocol* _protocol, net_buffer* buffer)
1345 {
1346 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1347 
1348 	TRACE_SK(protocol, "SendData(%p [%" B_PRIu32 " bytes])", buffer,
1349 		buffer->size);
1350 
1351 	sockaddr_in6* destination = (sockaddr_in6*)buffer->destination;
1352 
1353 	// handle IPV6_MULTICAST_IF
1354 	if (IN6_IS_ADDR_MULTICAST(&destination->sin6_addr)
1355 		&& protocol->multicast_address != NULL) {
1356 		net_interface_address* address = sDatalinkModule->get_interface_address(
1357 			protocol->multicast_address);
1358 		if (address == NULL || (address->interface->flags & IFF_UP) == 0) {
1359 			sDatalinkModule->put_interface_address(address);
1360 			return EADDRNOTAVAIL;
1361 		}
1362 
1363 		sDatalinkModule->put_interface_address(buffer->interface_address);
1364 		buffer->interface_address = address;
1365 			// the buffer takes over ownership of the address
1366 
1367 		net_route* route = sDatalinkModule->get_route(sDomain, address->local);
1368 		if (route == NULL)
1369 			return ENETUNREACH;
1370 
1371 		return sDatalinkModule->send_routed_data(route, buffer);
1372 	}
1373 
1374 	return sDatalinkModule->send_data(protocol, sDomain, buffer);
1375 }
1376 
1377 
1378 ssize_t
1379 ipv6_send_avail(net_protocol* protocol)
1380 {
1381 	return B_ERROR;
1382 }
1383 
1384 
1385 status_t
1386 ipv6_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1387 	net_buffer** _buffer)
1388 {
1389 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1390 	RawSocket* raw = protocol->raw;
1391 	if (raw == NULL)
1392 		return B_ERROR;
1393 
1394 	TRACE_SK(protocol, "ReadData(%" B_PRIuSIZE ", 0x%" B_PRIu32 ")", numBytes,
1395 		flags);
1396 
1397 	return raw->Dequeue(flags, _buffer);
1398 }
1399 
1400 
1401 ssize_t
1402 ipv6_read_avail(net_protocol* _protocol)
1403 {
1404 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1405 	RawSocket* raw = protocol->raw;
1406 	if (raw == NULL)
1407 		return B_ERROR;
1408 
1409 	return raw->AvailableData();
1410 }
1411 
1412 
1413 struct net_domain*
1414 ipv6_get_domain(net_protocol* protocol)
1415 {
1416 	return sDomain;
1417 }
1418 
1419 
1420 size_t
1421 ipv6_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1422 {
1423 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1424 	if (route == NULL)
1425 		return 0;
1426 
1427 	size_t mtu;
1428 	if (route->mtu != 0)
1429 		mtu = route->mtu;
1430 	else
1431 		mtu = route->interface_address->interface->device->mtu;
1432 
1433 	sDatalinkModule->put_route(sDomain, route);
1434 	// TODO: what about extension headers?
1435 	// this function probably shoud be changed in calling places, not here
1436 	return mtu - sizeof(ip6_hdr);
1437 }
1438 
1439 
1440 status_t
1441 ipv6_receive_data(net_buffer* buffer)
1442 {
1443 	TRACE("ReceiveData(%p [%" B_PRIu32 " bytes])", buffer, buffer->size);
1444 
1445 	NetBufferHeaderReader<IPv6Header> bufferHeader(buffer);
1446 	if (bufferHeader.Status() != B_OK)
1447 		return bufferHeader.Status();
1448 
1449 	IPv6Header &header = bufferHeader.Data();
1450 	// dump_ipv6_header(header);
1451 
1452 	if (header.ProtocolVersion() != IPV6_VERSION)
1453 		return B_BAD_TYPE;
1454 
1455 	uint16 packetLength = header.PayloadLength() + sizeof(ip6_hdr);
1456 	if (packetLength > buffer->size)
1457 		return B_BAD_DATA;
1458 
1459 	// lower layers notion of Broadcast or Multicast have no relevance to us
1460 	buffer->flags &= ~(MSG_BCAST | MSG_MCAST);
1461 
1462 	sockaddr_in6 destination;
1463 	fill_sockaddr_in6(&destination, header.Dst());
1464 
1465 	if (IN6_IS_ADDR_MULTICAST(&destination.sin6_addr)) {
1466 		buffer->flags |= MSG_MCAST;
1467 	} else {
1468 		uint32 matchedAddressType = 0;
1469 
1470 		// test if the packet is really for us
1471 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1472 				&buffer->interface_address, &matchedAddressType)
1473 			&& !sDatalinkModule->is_local_link_address(sDomain, true,
1474 				buffer->destination, &buffer->interface_address)) {
1475 
1476 			char srcbuf[INET6_ADDRSTRLEN];
1477 			char dstbuf[INET6_ADDRSTRLEN];
1478 			ip6_sprintf(&header.Src(), srcbuf);
1479 			ip6_sprintf(&header.Dst(), dstbuf);
1480 			TRACE("  ipv6_receive_data(): packet was not for us %s -> %s",
1481 				srcbuf, dstbuf);
1482 
1483 			// TODO: Send ICMPv6 error: Host unreachable
1484 			return B_ERROR;
1485 		}
1486 
1487 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1488 		buffer->flags |= matchedAddressType;
1489 	}
1490 
1491 	// set net_buffer's source/destination address
1492 	fill_sockaddr_in6((struct sockaddr_in6*)buffer->source, header.Src());
1493 	memcpy(buffer->destination, &destination, sizeof(sockaddr_in6));
1494 
1495 	// get the transport protocol and transport header offset
1496 	uint16 transportHeaderOffset = header.GetHeaderOffset(buffer);
1497 	uint8 protocol = buffer->protocol;
1498 
1499 	// remove any trailing/padding data
1500 	status_t status = gBufferModule->trim(buffer, packetLength);
1501 	if (status != B_OK)
1502 		return status;
1503 
1504 	// check for fragmentation
1505 	uint16 fragmentHeaderOffset
1506 		= header.GetHeaderOffset(buffer, IPPROTO_FRAGMENT);
1507 
1508 	if (fragmentHeaderOffset != 0) {
1509 		// this is a fragment
1510 		TRACE("  ipv6_receive_data(): Found a Fragment!");
1511 		status = reassemble_fragments(header, &buffer, fragmentHeaderOffset);
1512 		TRACE("  ipv6_receive_data():  -> %s", strerror(status));
1513 		if (status != B_OK)
1514 			return status;
1515 
1516 		if (buffer == NULL) {
1517 			// buffer was put into fragment packet
1518 			TRACE("  ipv6_receive_data(): Not yet assembled.");
1519 			return B_OK;
1520 		}
1521 	}
1522 
1523 	// tell the buffer to preserve removed ipv6 header - may need it later
1524 	gBufferModule->store_header(buffer);
1525 
1526 	// remove ipv6 headers for now
1527 	gBufferModule->remove_header(buffer, transportHeaderOffset);
1528 
1529 	// deliver the data to raw sockets
1530 	raw_receive_data(buffer);
1531 
1532 	net_protocol_module_info* module = receiving_protocol(protocol);
1533 	if (module == NULL) {
1534 		// no handler for this packet
1535 		return EAFNOSUPPORT;
1536 	}
1537 
1538 	if ((buffer->flags & MSG_MCAST) != 0) {
1539 		// Unfortunately historical reasons dictate that the IP multicast
1540 		// model be a little different from the unicast one. We deliver
1541 		// this frame directly to all sockets registered with interest
1542 		// for this multicast group.
1543 		return deliver_multicast(module, buffer, false);
1544 	}
1545 
1546 	return module->receive_data(buffer);
1547 }
1548 
1549 
1550 status_t
1551 ipv6_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1552 {
1553 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1554 
1555 	if (protocol->raw == NULL)
1556 		return B_ERROR;
1557 
1558 	return protocol->raw->EnqueueClone(buffer);
1559 }
1560 
1561 
1562 status_t
1563 ipv6_error_received(net_error error, net_buffer* data)
1564 {
1565 	return B_ERROR;
1566 }
1567 
1568 
1569 status_t
1570 ipv6_error_reply(net_protocol* protocol, net_buffer* cause, net_error error,
1571 	net_error_data* errorData)
1572 {
1573 	return B_ERROR;
1574 }
1575 
1576 
1577 ssize_t
1578 ipv6_process_ancillary_data_no_container(net_protocol* _protocol,
1579 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1580 {
1581 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1582 	ssize_t bytesWritten = 0;
1583 
1584 	if (protocol->receive_hoplimit != 0) {
1585 		TRACE("receive_hoplimit");
1586 
1587 		if (msgControlLen < CMSG_SPACE(sizeof(int)))
1588 			return B_NO_MEMORY;
1589 
1590 		// use some default value (64 at the moment) when the real one fails
1591 		int hopLimit = IPV6_DEFHLIM;
1592 
1593 		if (gBufferModule->stored_header_length(buffer)
1594 				>= (int)sizeof(ip6_hdr)) {
1595 			IPv6Header header;
1596 			if (gBufferModule->restore_header(buffer, 0,
1597 					&header, sizeof(ip6_hdr)) == B_OK
1598 				&& header.ProtocolVersion() != IPV6_VERSION) {
1599 				// header is OK, take hoplimit from it
1600 				hopLimit = header.header.ip6_hlim;
1601 			}
1602 		}
1603 
1604 		cmsghdr* messageHeader = (cmsghdr*)((char*)msgControl + bytesWritten);
1605 		messageHeader->cmsg_len = CMSG_LEN(sizeof(int));
1606 		messageHeader->cmsg_level = IPPROTO_IPV6;
1607 		messageHeader->cmsg_type = IPV6_HOPLIMIT;
1608 
1609 		memcpy(CMSG_DATA(messageHeader), &hopLimit, sizeof(int));
1610 
1611 		bytesWritten += CMSG_SPACE(sizeof(int));
1612 		msgControlLen -= CMSG_SPACE(sizeof(int));
1613 	}
1614 
1615 	if (protocol->receive_pktinfo != 0) {
1616 		TRACE("receive_pktinfo");
1617 
1618 		if (msgControlLen < CMSG_SPACE(sizeof(struct in6_pktinfo)))
1619 			return B_NO_MEMORY;
1620 
1621 		cmsghdr* messageHeader = (cmsghdr*)((char*)msgControl + bytesWritten);
1622 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
1623 		messageHeader->cmsg_level = IPPROTO_IPV6;
1624 		messageHeader->cmsg_type = IPV6_PKTINFO;
1625 
1626 		struct in6_pktinfo pi;
1627 		memcpy(&pi.ipi6_addr,
1628 			&((struct sockaddr_in6*)buffer->destination)->sin6_addr,
1629 			sizeof(struct in6_addr));
1630 		if (buffer->interface_address != NULL
1631 			&& buffer->interface_address->interface != NULL)
1632 			pi.ipi6_ifindex = buffer->interface_address->interface->index;
1633 		else
1634 			pi.ipi6_ifindex = 0;
1635 		memcpy(CMSG_DATA(messageHeader), &pi, sizeof(struct in6_pktinfo));
1636 
1637 		bytesWritten += CMSG_SPACE(sizeof(struct in6_pktinfo));
1638 		msgControlLen -= CMSG_SPACE(sizeof(struct in6_pktinfo));
1639 	}
1640 
1641 	return bytesWritten;
1642 }
1643 
1644 
1645 //	#pragma mark -
1646 
1647 
1648 status_t
1649 init_ipv6()
1650 {
1651 	mutex_init(&sRawSocketsLock, "raw sockets");
1652 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1653 	mutex_init(&sMulticastGroupsLock, "IPv6 multicast groups");
1654 	mutex_init(&sReceivingProtocolLock, "IPv6 receiving protocols");
1655 
1656 	status_t status;
1657 
1658 	sMulticastState = new MulticastState();
1659 	if (sMulticastState == NULL) {
1660 		status = B_NO_MEMORY;
1661 		goto err1;
1662 	}
1663 
1664 	status = sMulticastState->Init();
1665 	if (status != B_OK)
1666 		goto err2;
1667 
1668 	new (&sFragmentHash) FragmentTable();
1669 	status = sFragmentHash.Init(256);
1670 	if (status != B_OK)
1671 		goto err3;
1672 
1673 	new (&sRawSockets) RawSocketList;
1674 		// static initializers do not work in the kernel,
1675 		// so we have to do it here, manually
1676 		// TODO: for modules, this shouldn't be required
1677 
1678 	status = gStackModule->register_domain_protocols(AF_INET6, SOCK_RAW, 0,
1679 		NET_IPV6_MODULE_NAME, NULL);
1680 	if (status != B_OK)
1681 		goto err3;
1682 
1683 	status = gStackModule->register_domain(AF_INET6, "internet6", &gIPv6Module,
1684 		&gIPv6AddressModule, &sDomain);
1685 	if (status != B_OK)
1686 		goto err3;
1687 
1688 	TRACE("init_ipv6: OK");
1689 	return B_OK;
1690 
1691 err3:
1692 	sFragmentHash.~FragmentTable();
1693 err2:
1694 	delete sMulticastState;
1695 err1:
1696 	mutex_destroy(&sReceivingProtocolLock);
1697 	mutex_destroy(&sMulticastGroupsLock);
1698 	mutex_destroy(&sFragmentLock);
1699 	mutex_destroy(&sRawSocketsLock);
1700 	TRACE("init_ipv6: error %s", strerror(status));
1701 	return status;
1702 }
1703 
1704 
1705 status_t
1706 uninit_ipv6()
1707 {
1708 	mutex_lock(&sReceivingProtocolLock);
1709 
1710 	// put all the domain receiving protocols we gathered so far
1711 	for (uint32 i = 0; i < 256; i++) {
1712 		if (sReceivingProtocol[i] != NULL)
1713 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1714 	}
1715 
1716 	sFragmentHash.~FragmentTable();
1717 	delete sMulticastState;
1718 
1719 	gStackModule->unregister_domain(sDomain);
1720 	mutex_unlock(&sReceivingProtocolLock);
1721 
1722 	mutex_destroy(&sMulticastGroupsLock);
1723 	mutex_destroy(&sFragmentLock);
1724 	mutex_destroy(&sRawSocketsLock);
1725 	mutex_destroy(&sReceivingProtocolLock);
1726 
1727 	return B_OK;
1728 }
1729 
1730 
1731 static status_t
1732 ipv6_std_ops(int32 op, ...)
1733 {
1734 	switch (op) {
1735 		case B_MODULE_INIT:
1736 			return init_ipv6();
1737 		case B_MODULE_UNINIT:
1738 			return uninit_ipv6();
1739 		default:
1740 			return B_ERROR;
1741 	}
1742 }
1743 
1744 
1745 net_protocol_module_info gIPv6Module = {
1746 	{
1747 		NET_IPV6_MODULE_NAME,
1748 		0,
1749 		ipv6_std_ops
1750 	},
1751 	NET_PROTOCOL_ATOMIC_MESSAGES,
1752 
1753 	ipv6_init_protocol,
1754 	ipv6_uninit_protocol,
1755 	ipv6_open,
1756 	ipv6_close,
1757 	ipv6_free,
1758 	ipv6_connect,
1759 	ipv6_accept,
1760 	ipv6_control,
1761 	ipv6_getsockopt,
1762 	ipv6_setsockopt,
1763 	ipv6_bind,
1764 	ipv6_unbind,
1765 	ipv6_listen,
1766 	ipv6_shutdown,
1767 	ipv6_send_data,
1768 	ipv6_send_routed_data,
1769 	ipv6_send_avail,
1770 	ipv6_read_data,
1771 	ipv6_read_avail,
1772 	ipv6_get_domain,
1773 	ipv6_get_mtu,
1774 	ipv6_receive_data,
1775 	ipv6_deliver_data,
1776 	ipv6_error_received,
1777 	ipv6_error_reply,
1778 	NULL,		// add_ancillary_data()
1779 	NULL,		// process_ancillary_data()
1780 	ipv6_process_ancillary_data_no_container,
1781 	NULL,		// send_data_no_buffer()
1782 	NULL		// read_data_no_buffer()
1783 };
1784 
1785 module_dependency module_dependencies[] = {
1786 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1787 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1788 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1789 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1790 	{}
1791 };
1792 
1793 module_info* modules[] = {
1794 	(module_info*)&gIPv6Module,
1795 	NULL
1796 };
1797