xref: /haiku/src/add-ons/kernel/network/protocols/ipv6/ipv6.cpp (revision eea5774f46bba925156498abf9cb1a1165647bf7)
1 /*
2  * Copyright 2006-2011, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *		Atis Elsts, the.kfx@gmail.com
8  */
9 
10 
11 #include "ipv6_address.h"
12 #include "ipv6_utils.h"
13 #include "multicast.h"
14 
15 #include <net_datalink.h>
16 #include <net_datalink_protocol.h>
17 #include <net_device.h>
18 #include <net_protocol.h>
19 #include <net_stack.h>
20 #include <NetBufferUtilities.h>
21 #include <ProtocolUtilities.h>
22 
23 #include <ByteOrder.h>
24 #include <KernelExport.h>
25 #include <StackOrHeapArray.h>
26 #include <util/AutoLock.h>
27 #include <util/list.h>
28 #include <util/DoublyLinkedList.h>
29 #include <util/MultiHashTable.h>
30 
31 #include <netinet6/in6.h>
32 #include <netinet/ip6.h>
33 #include <netinet/icmp6.h>
34 #include <new>
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include <utility>
39 
40 
41 //#define TRACE_IPV6
42 #ifdef TRACE_IPV6
43 	#define TRACE(format, args...) \
44 		dprintf("IPv6 [%" B_PRIdBIGTIME "] " format "\n", system_time(), ##args)
45 	#define TRACE_SK(protocol, format, args...) \
46 		dprintf("IPv6 [%" B_PRIdBIGTIME "] %p " format "\n", system_time(), \
47 			protocol, ##args)
48 #else
49 	#define TRACE(args...)
50 	#define TRACE_SK(args...)
51 #endif
52 
53 
54 #define MAX_HASH_FRAGMENTS 		64
55 	// slots in the fragment packet's hash
56 #define FRAGMENT_TIMEOUT		60000000LL
57 	// discard fragment after 60 seconds [RFC 2460]
58 
59 
60 struct IPv6Header {
61 	struct ip6_hdr header;
62 
63 	uint8 ProtocolVersion() const { return header.ip6_vfc & IPV6_VERSION_MASK; }
64 	uint8 ServiceType() const { return ntohl(header.ip6_flow) >> 20;}
65 	uint16 PayloadLength() const { return ntohs(header.ip6_plen); }
66 	const in6_addr& Dst() const { return header.ip6_dst; }
67 	const in6_addr& Src() const { return header.ip6_src; }
68 	uint8 NextHeader() const { return header.ip6_nxt; }
69 	uint16 GetHeaderOffset(net_buffer* buffer, uint32 headerCode = ~0u) const;
70 };
71 
72 
73 typedef DoublyLinkedList<struct net_buffer,
74 	DoublyLinkedListCLink<struct net_buffer> > FragmentList;
75 
76 
77 // TODO: make common fragmentation interface for both address families
78 struct ipv6_packet_key {
79 	in6_addr	source;
80 	in6_addr	destination;
81 	// We use uint32 here due to the hash function
82 	uint32		id;
83 	uint32		protocol;
84 };
85 
86 
87 class FragmentPacket {
88 public:
89 								FragmentPacket(const ipv6_packet_key& key);
90 								~FragmentPacket();
91 
92 			status_t			AddFragment(uint16 start, uint16 end,
93 									net_buffer* buffer, bool lastFragment);
94 			status_t			Reassemble(net_buffer* to);
95 
96 			bool				IsComplete() const
97 									{ return fReceivedLastFragment
98 										&& fBytesLeft == 0; }
99 
100 			const ipv6_packet_key& Key() const { return fKey; }
101 			FragmentPacket*&	HashTableLink() { return fNext; }
102 
103 	static	void				StaleTimer(struct net_timer* timer, void* data);
104 
105 private:
106 			FragmentPacket*		fNext;
107 			struct ipv6_packet_key fKey;
108 			uint32				fIndex;
109 			int32				fBytesLeft;
110 			FragmentList		fFragments;
111 			net_timer			fTimer;
112 			bool				fReceivedLastFragment;
113 };
114 
115 
116 struct FragmentHashDefinition {
117 	typedef ipv6_packet_key KeyType;
118 	typedef FragmentPacket ValueType;
119 
120 	size_t HashKey(const KeyType& key) const
121 	{
122 		return jenkins_hashword((const uint32*)&key,
123 			sizeof(ipv6_packet_key) / sizeof(uint32), 0);
124 	}
125 
126 	size_t Hash(ValueType* value) const
127 	{
128 		return HashKey(value->Key());
129 	}
130 
131 	bool Compare(const KeyType& key, ValueType* value) const
132 	{
133 		const ipv6_packet_key& packetKey = value->Key();
134 
135 		return packetKey.id == key.id
136 			&& packetKey.source == key.source
137 			&& packetKey.destination == key.destination
138 			&& packetKey.protocol == key.protocol;
139 	}
140 
141 	ValueType*& GetLink(ValueType* value) const
142 	{
143 		return value->HashTableLink();
144 	}
145 };
146 
147 
148 typedef BOpenHashTable<FragmentHashDefinition, false, true> FragmentTable;
149 
150 
151 class RawSocket
152 	: public DoublyLinkedListLinkImpl<RawSocket>, public DatagramSocket<> {
153 public:
154 							RawSocket(net_socket* socket);
155 };
156 
157 
158 typedef DoublyLinkedList<RawSocket> RawSocketList;
159 
160 typedef MulticastGroupInterface<IPv6Multicast> IPv6GroupInterface;
161 typedef MulticastFilter<IPv6Multicast> IPv6MulticastFilter;
162 
163 struct MulticastStateHash {
164 	typedef std::pair<const in6_addr*, uint32> KeyType;
165 	typedef IPv6GroupInterface ValueType;
166 
167 	size_t HashKey(const KeyType &key) const;
168 	size_t Hash(ValueType* value) const
169 		{ return HashKey(std::make_pair(&value->Address(),
170 			value->Interface()->index)); }
171 	bool Compare(const KeyType &key, ValueType* value) const
172 		{ return value->Interface()->index == key.second
173 			&& value->Address() == *key.first; }
174 	bool CompareValues(ValueType* value1, ValueType* value2) const
175 		{ return value1->Interface()->index == value2->Interface()->index
176 			&& value1->Address() == value2->Address(); }
177 	ValueType*& GetLink(ValueType* value) const { return value->MulticastGroupsHashLink(); }
178 };
179 
180 
181 struct ipv6_protocol : net_protocol {
182 	ipv6_protocol()
183 		:
184 		raw(NULL),
185 		multicast_filter(this)
186 	{
187 	}
188 
189 	~ipv6_protocol()
190 	{
191 		delete raw;
192 	}
193 
194 	RawSocket	*raw;
195 	uint8		service_type;
196 	uint8		time_to_live;
197 	uint8		multicast_time_to_live;
198 	uint8		receive_hoplimit;
199 	uint8		receive_pktinfo;
200 	struct sockaddr* multicast_address; // for IPV6_MULTICAST_IF
201 
202 	IPv6MulticastFilter multicast_filter;
203 };
204 
205 
206 static const int kDefaultTTL = IPV6_DEFHLIM;
207 static const int kDefaultMulticastTTL = 1;
208 
209 
210 extern net_protocol_module_info gIPv6Module;
211 	// we need this in ipv6_std_ops() for registering the AF_INET6 domain
212 
213 net_stack_module_info* gStackModule;
214 net_buffer_module_info* gBufferModule;
215 
216 static struct net_domain* sDomain;
217 static net_datalink_module_info* sDatalinkModule;
218 static net_socket_module_info* sSocketModule;
219 static RawSocketList sRawSockets;
220 static mutex sRawSocketsLock;
221 static mutex sFragmentLock;
222 static FragmentTable sFragmentHash;
223 static int32 sFragmentID;
224 static mutex sMulticastGroupsLock;
225 
226 typedef MultiHashTable<MulticastStateHash> MulticastState;
227 static MulticastState* sMulticastState;
228 
229 static net_protocol_module_info* sReceivingProtocol[256];
230 static mutex sReceivingProtocolLock;
231 
232 
233 uint16
234 IPv6Header::GetHeaderOffset(net_buffer* buffer, uint32 headerCode) const
235 {
236 	uint16 offset = sizeof(struct ip6_hdr);
237 	uint8 next = header.ip6_nxt;
238 
239 	// these are the extension headers that might be supported one day
240 	while (next != headerCode
241 		&& (next == IPPROTO_HOPOPTS
242 			|| next == IPPROTO_ROUTING
243 			|| next == IPPROTO_FRAGMENT
244 			|| next == IPPROTO_ESP
245 			|| next == IPPROTO_AH
246 			|| next == IPPROTO_DSTOPTS)) {
247 		struct ip6_ext extensionHeader;
248 		status_t status = gBufferModule->read(buffer, offset,
249 			&extensionHeader, sizeof(ip6_ext));
250 		if (status != B_OK)
251 			break;
252 
253 		next = extensionHeader.ip6e_nxt;
254 		offset += extensionHeader.ip6e_len;
255 	}
256 
257 	// were we looking for a specific header?
258 	if (headerCode != ~0u) {
259 		if (next == headerCode) {
260 			// found the specific header
261 			return offset;
262 		}
263 		// return 0 if fragement header is not present
264 		return 0;
265 	}
266 
267 	// the general transport layer header case
268 	buffer->protocol = next;
269 	return offset;
270 }
271 
272 
273 RawSocket::RawSocket(net_socket* socket)
274 	:
275 	DatagramSocket<>("ipv6 raw socket", socket)
276 {
277 }
278 
279 
280 //	#pragma mark -
281 
282 
283 FragmentPacket::FragmentPacket(const ipv6_packet_key &key)
284 	:
285 	fKey(key),
286 	fBytesLeft(IPV6_MAXPACKET),
287 	fReceivedLastFragment(false)
288 {
289 	gStackModule->init_timer(&fTimer, FragmentPacket::StaleTimer, this);
290 }
291 
292 
293 FragmentPacket::~FragmentPacket()
294 {
295 	// cancel the kill timer
296 	gStackModule->set_timer(&fTimer, -1);
297 
298 	// delete all fragments
299 	net_buffer* buffer;
300 	while ((buffer = fFragments.RemoveHead()) != NULL) {
301 		gBufferModule->free(buffer);
302 	}
303 }
304 
305 
306 status_t
307 FragmentPacket::AddFragment(uint16 start, uint16 end, net_buffer* buffer,
308 	bool lastFragment)
309 {
310 	// restart the timer
311 	gStackModule->set_timer(&fTimer, FRAGMENT_TIMEOUT);
312 
313 	if (start >= end) {
314 		// invalid fragment
315 		return B_BAD_DATA;
316 	}
317 
318 	// Search for a position in the list to insert the fragment
319 
320 	FragmentList::ReverseIterator iterator = fFragments.GetReverseIterator();
321 	net_buffer* previous = NULL;
322 	net_buffer* next = NULL;
323 	while ((previous = iterator.Next()) != NULL) {
324 		if (previous->fragment.start <= start) {
325 			// The new fragment can be inserted after this one
326 			break;
327 		}
328 
329 		next = previous;
330 	}
331 
332 	// See if we already have the fragment's data
333 
334 	if (previous != NULL && previous->fragment.start <= start
335 		&& previous->fragment.end >= end) {
336 		// we do, so we can just drop this fragment
337 		gBufferModule->free(buffer);
338 		return B_OK;
339 	}
340 
341 	fIndex = buffer->index;
342 		// adopt the buffer's device index
343 
344 	TRACE("    previous: %p, next: %p", previous, next);
345 
346 	// If we have parts of the data already, truncate as needed
347 
348 	if (previous != NULL && previous->fragment.end > start) {
349 		TRACE("    remove header %d bytes", previous->fragment.end - start);
350 		gBufferModule->remove_header(buffer, previous->fragment.end - start);
351 		start = previous->fragment.end;
352 	}
353 	if (next != NULL && end > next->fragment.start) {
354 		TRACE("    remove trailer %d bytes", end - next->fragment.start);
355 		gBufferModule->remove_trailer(buffer, end - next->fragment.start);
356 		end = next->fragment.start;
357 	}
358 
359 	// Now try if we can already merge the fragments together
360 
361 	// We will always keep the last buffer received, so that we can still
362 	// report an error (in which case we're not responsible for freeing it)
363 
364 	if (previous != NULL && previous->fragment.end == start) {
365 		fFragments.Remove(previous);
366 
367 		buffer->fragment.start = previous->fragment.start;
368 		buffer->fragment.end = end;
369 
370 		status_t status = gBufferModule->merge(buffer, previous, false);
371 		TRACE("    merge previous: %s", strerror(status));
372 		if (status != B_OK) {
373 			fFragments.InsertBefore(next, previous);
374 			return status;
375 		}
376 
377 		fFragments.InsertBefore(next, buffer);
378 
379 		// cut down existing hole
380 		fBytesLeft -= end - start;
381 
382 		if (lastFragment && !fReceivedLastFragment) {
383 			fReceivedLastFragment = true;
384 			fBytesLeft -= IPV6_MAXPACKET - end;
385 		}
386 
387 		TRACE("    hole length: %d", (int)fBytesLeft);
388 
389 		return B_OK;
390 	} else if (next != NULL && next->fragment.start == end) {
391 		net_buffer* afterNext = (net_buffer*)next->link.next;
392 		fFragments.Remove(next);
393 
394 		buffer->fragment.start = start;
395 		buffer->fragment.end = next->fragment.end;
396 
397 		status_t status = gBufferModule->merge(buffer, next, true);
398 		TRACE("    merge next: %s", strerror(status));
399 		if (status != B_OK) {
400 			// Insert "next" at its previous position
401 			fFragments.InsertBefore(afterNext, next);
402 			return status;
403 		}
404 
405 		fFragments.InsertBefore(afterNext, buffer);
406 
407 		// cut down existing hole
408 		fBytesLeft -= end - start;
409 
410 		if (lastFragment && !fReceivedLastFragment) {
411 			fReceivedLastFragment = true;
412 			fBytesLeft -= IPV6_MAXPACKET - end;
413 		}
414 
415 		TRACE("    hole length: %d", (int)fBytesLeft);
416 
417 		return B_OK;
418 	}
419 
420 	// We couldn't merge the fragments, so we need to add it as is
421 
422 	TRACE("    new fragment: %p, bytes %d-%d", buffer, start, end);
423 
424 	buffer->fragment.start = start;
425 	buffer->fragment.end = end;
426 	fFragments.InsertBefore(next, buffer);
427 
428 	// update length of the hole, if any
429 	fBytesLeft -= end - start;
430 
431 	if (lastFragment && !fReceivedLastFragment) {
432 		fReceivedLastFragment = true;
433 		fBytesLeft -= IPV6_MAXPACKET - end;
434 	}
435 
436 	TRACE("    hole length: %d", (int)fBytesLeft);
437 
438 	return B_OK;
439 }
440 
441 
442 /*!	Reassembles the fragments to the specified buffer \a to.
443 	This buffer must have been added via AddFragment() before.
444 */
445 status_t
446 FragmentPacket::Reassemble(net_buffer* to)
447 {
448 	if (!IsComplete())
449 		return B_ERROR;
450 
451 	net_buffer* buffer = NULL;
452 
453 	net_buffer* fragment;
454 	while ((fragment = fFragments.RemoveHead()) != NULL) {
455 		if (buffer != NULL) {
456 			status_t status;
457 			if (to == fragment) {
458 				status = gBufferModule->merge(fragment, buffer, false);
459 				buffer = fragment;
460 			} else
461 				status = gBufferModule->merge(buffer, fragment, true);
462 			if (status != B_OK)
463 				return status;
464 		} else
465 			buffer = fragment;
466 	}
467 
468 	if (buffer != to)
469 		panic("ipv6 packet reassembly did not work correctly.");
470 
471 	to->index = fIndex;
472 		// reset the buffer's device index
473 
474 	return B_OK;
475 }
476 
477 
478 /*static*/ void
479 FragmentPacket::StaleTimer(struct net_timer* timer, void* data)
480 {
481 	FragmentPacket* packet = (FragmentPacket*)data;
482 	TRACE("Assembling FragmentPacket %p timed out!", packet);
483 
484 	MutexLocker locker(&sFragmentLock);
485 	sFragmentHash.Remove(packet);
486 	locker.Unlock();
487 
488 	if (!packet->fFragments.IsEmpty()) {
489 		// Send error: fragment reassembly time exceeded
490 		sDomain->module->error_reply(NULL, packet->fFragments.First(),
491 			B_NET_ERROR_REASSEMBLY_TIME_EXCEEDED, NULL);
492 	}
493 
494 	delete packet;
495 }
496 
497 
498 //	#pragma mark -
499 
500 
501 size_t
502 MulticastStateHash::HashKey(const KeyType &key) const
503 {
504 	size_t result = 0;
505 	result = jenkins_hashword((const uint32*)key.first,
506 		sizeof(in6_addr) / sizeof(uint32), result);
507 	result = jenkins_hashword(&key.second, 1, result);
508 	return result;
509 }
510 
511 
512 //	#pragma mark -
513 
514 
515 static inline void
516 dump_ipv6_header(IPv6Header &header)
517 {
518 #ifdef TRACE_IPV6
519 	char addrbuf[INET6_ADDRSTRLEN];
520 	dprintf("  version: %d\n", header.ProtocolVersion() >> 4);
521 	dprintf("  service_type: %d\n", header.ServiceType());
522 	dprintf("  payload_length: %d\n", header.PayloadLength());
523 	dprintf("  next_header: %d\n", header.NextHeader());
524 	dprintf("  hop_limit: %d\n", header.header.ip6_hops);
525 	dprintf("  source: %s\n", ip6_sprintf(&header.header.ip6_src, addrbuf));
526 	dprintf("  destination: %s\n",
527 		ip6_sprintf(&header.header.ip6_dst, addrbuf));
528 #endif
529 }
530 
531 
532 /*!	Attempts to re-assemble fragmented packets.
533 	\return B_OK if everything went well; if it could reassemble the packet,
534 		\a _buffer will point to its buffer, otherwise, it will be \c NULL.
535 	\return various error codes if something went wrong (mostly B_NO_MEMORY)
536 */
537 static status_t
538 reassemble_fragments(const IPv6Header &header, net_buffer** _buffer,
539 	uint16 offset)
540 {
541 	net_buffer* buffer = *_buffer;
542 	status_t status;
543 
544 	ip6_frag fragmentHeader;
545 	status = gBufferModule->read(buffer, offset, &fragmentHeader,
546 		sizeof(ip6_frag));
547 
548 	if (status != B_OK)
549 		return status;
550 
551 	struct ipv6_packet_key key;
552 	memcpy(&key.source, &header.Src(), sizeof(in6_addr));
553 	memcpy(&key.destination, &header.Dst(), sizeof(in6_addr));
554 	key.id = fragmentHeader.ip6f_ident;
555 	key.protocol = fragmentHeader.ip6f_nxt;
556 
557 	// TODO: Make locking finer grained.
558 	MutexLocker locker(&sFragmentLock);
559 
560 	FragmentPacket* packet = sFragmentHash.Lookup(key);
561 	if (packet == NULL) {
562 		// New fragment packet
563 		packet = new (std::nothrow) FragmentPacket(key);
564 		if (packet == NULL)
565 			return B_NO_MEMORY;
566 
567 		// add packet to hash
568 		status = sFragmentHash.Insert(packet);
569 		if (status != B_OK) {
570 			delete packet;
571 			return status;
572 		}
573 	}
574 
575 	uint16 start = ntohs(fragmentHeader.ip6f_offlg & IP6F_OFF_MASK);
576 	uint16 end = start + header.PayloadLength();
577 	bool lastFragment = (fragmentHeader.ip6f_offlg & IP6F_MORE_FRAG) == 0;
578 
579 	TRACE("   Received IPv6 %sfragment of size %d, offset %d.",
580 		lastFragment ? "last ": "", end - start, start);
581 
582 	// Remove header unless this is the first fragment
583 	if (start != 0)
584 		gBufferModule->remove_header(buffer, offset);
585 
586 	status = packet->AddFragment(start, end, buffer, lastFragment);
587 	if (status != B_OK)
588 		return status;
589 
590 	if (packet->IsComplete()) {
591 		sFragmentHash.Remove(packet);
592 			// no matter if reassembling succeeds, we won't need this packet
593 			// anymore
594 
595 		status = packet->Reassemble(buffer);
596 		delete packet;
597 
598 		// _buffer does not change
599 		return status;
600 	}
601 
602 	// This indicates that the packet is not yet complete
603 	*_buffer = NULL;
604 	return B_OK;
605 }
606 
607 
608 /*!	Fragments the incoming buffer and send all fragments via the specified
609 	\a route.
610 */
611 static status_t
612 send_fragments(ipv6_protocol* protocol, struct net_route* route,
613 	net_buffer* buffer, uint32 mtu)
614 {
615 	TRACE_SK(protocol, "SendFragments(%" B_PRIu32 " bytes, mtu %" B_PRIu32 ")",
616 		buffer->size, mtu);
617 
618 	NetBufferHeaderReader<IPv6Header> originalHeader(buffer);
619 	if (originalHeader.Status() != B_OK)
620 		return originalHeader.Status();
621 
622 	// TODO: currently FragHeader goes always as the last one, but in theory
623 	// ext. headers like AuthHeader and DestOptions should go after it.
624 	uint16 headersLength = originalHeader->GetHeaderOffset(buffer);
625 	uint16 extensionHeadersLength = headersLength
626 		- sizeof(ip6_hdr) + sizeof(ip6_frag);
627 	uint32 bytesLeft = buffer->size - headersLength;
628 	uint32 fragmentOffset = 0;
629 	status_t status = B_OK;
630 
631 	// TODO: this is rather inefficient
632 	net_buffer* headerBuffer = gBufferModule->clone(buffer, false);
633 	if (headerBuffer == NULL)
634 		return B_NO_MEMORY;
635 
636 	status = gBufferModule->remove_trailer(headerBuffer, bytesLeft);
637 	if (status != B_OK)
638 		return status;
639 
640 	BStackOrHeapArray<uint8, 128> data(bytesLeft);
641 	if (!data.IsValid())
642 		return B_NO_MEMORY;
643 	status = gBufferModule->read(buffer, headersLength, data, bytesLeft);
644 	if (status != B_OK)
645 		return status;
646 
647 	// TODO (from ipv4): we need to make sure all header space is contiguous or
648 	// use another construct.
649 	NetBufferHeaderReader<IPv6Header> bufferHeader(headerBuffer);
650 
651 	// Adapt MTU to be a multiple of 8 (fragment offsets can only be specified
652 	// this way)
653 	mtu -= headersLength + sizeof(ip6_frag);
654 	mtu &= ~7;
655 	TRACE("  adjusted MTU to %" B_PRIu32 " bytesLeft %" B_PRIu32, mtu,
656 		bytesLeft);
657 
658 	while (bytesLeft > 0) {
659 		uint32 fragmentLength = min_c(bytesLeft, mtu);
660 		bytesLeft -= fragmentLength;
661 		bool lastFragment = bytesLeft == 0;
662 
663 		bufferHeader->header.ip6_nxt = IPPROTO_FRAGMENT;
664 		bufferHeader->header.ip6_plen
665 			= htons(fragmentLength + extensionHeadersLength);
666 		bufferHeader.Sync();
667 
668 		ip6_frag fragmentHeader;
669 		fragmentHeader.ip6f_nxt = originalHeader->NextHeader();
670 		fragmentHeader.ip6f_reserved = 0;
671 		fragmentHeader.ip6f_offlg = htons(fragmentOffset) & IP6F_OFF_MASK;
672 		if (!lastFragment)
673 			fragmentHeader.ip6f_offlg |= IP6F_MORE_FRAG;
674 		fragmentHeader.ip6f_ident = htonl(atomic_add(&sFragmentID, 1));
675 
676 		TRACE("  send fragment of %" B_PRIu32 " bytes (%" B_PRIu32
677 			" bytes left)", fragmentLength, bytesLeft);
678 
679 		net_buffer* fragmentBuffer;
680 		if (!lastFragment)
681 			fragmentBuffer = gBufferModule->clone(headerBuffer, false);
682 		else
683 			fragmentBuffer = buffer;
684 
685 		if (fragmentBuffer == NULL) {
686 			status = B_NO_MEMORY;
687 			break;
688 		}
689 
690 		// copy data to fragment
691 		do {
692 			status = gBufferModule->append(
693 				fragmentBuffer, &fragmentHeader, sizeof(ip6_frag));
694 			if (status != B_OK)
695 				break;
696 
697 			status = gBufferModule->append(
698 				fragmentBuffer, &data[fragmentOffset], fragmentLength);
699 			if (status != B_OK)
700 				break;
701 
702 			// send fragment
703 			status = sDatalinkModule->send_routed_data(route, fragmentBuffer);
704 		} while (false);
705 
706 		if (lastFragment) {
707 			// we don't own the last buffer, so we don't have to free it
708 			break;
709 		}
710 
711 		if (status != B_OK) {
712 			gBufferModule->free(fragmentBuffer);
713 			break;
714 		}
715 
716 		fragmentOffset += fragmentLength;
717 	}
718 
719 	gBufferModule->free(headerBuffer);
720 	return status;
721 }
722 
723 
724 static status_t
725 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
726 	bool deliverToRaw, net_interface *interface)
727 {
728 	sockaddr_in6* multicastAddr = (sockaddr_in6*)buffer->destination;
729 
730 	MulticastState::ValueIterator it = sMulticastState->Lookup(std::make_pair(
731 		&multicastAddr->sin6_addr, interface->index));
732 
733 	while (it.HasNext()) {
734 		IPv6GroupInterface* state = it.Next();
735 		ipv6_protocol* ipproto = state->Parent()->Socket();
736 
737 		if (deliverToRaw && ipproto->raw == NULL)
738 			continue;
739 
740 		if (state->FilterAccepts(buffer)) {
741 			// TODO: do as in IPv4 code
742 			module->deliver_data(ipproto, buffer);
743 		}
744 	}
745 
746 	return B_OK;
747 }
748 
749 
750 static status_t
751 deliver_multicast(net_protocol_module_info* module, net_buffer* buffer,
752 	bool deliverToRaw)
753 {
754 	if (module->deliver_data == NULL)
755 		return B_OK;
756 
757 	MutexLocker _(sMulticastGroupsLock);
758 
759 	status_t status = B_OK;
760 	if (buffer->interface_address != NULL) {
761 		status = deliver_multicast(module, buffer, deliverToRaw,
762 			buffer->interface_address->interface);
763 	} else {
764 #if 0 //  FIXME: multicast
765 		net_domain_private* domain = (net_domain_private*)sDomain;
766 		RecursiveLocker locker(domain->lock);
767 
768 		net_interface* interface = NULL;
769 		while (true) {
770 			interface = (net_interface*)list_get_next_item(
771 				&domain->interfaces, interface);
772 			if (interface == NULL)
773 				break;
774 
775 			status = deliver_multicast(module, buffer, deliverToRaw, interface);
776 			if (status < B_OK)
777 				break;
778 		}
779 #endif
780 	}
781 	return status;
782 }
783 
784 
785 static void
786 raw_receive_data(net_buffer* buffer)
787 {
788 	MutexLocker locker(sRawSocketsLock);
789 
790 	if (sRawSockets.IsEmpty())
791 		return;
792 
793 	TRACE("RawReceiveData(%i)", buffer->protocol);
794 
795 	if ((buffer->msg_flags & MSG_MCAST) != 0) {
796 		deliver_multicast(&gIPv6Module, buffer, true);
797 	} else {
798 		RawSocketList::Iterator iterator = sRawSockets.GetIterator();
799 
800 		while (iterator.HasNext()) {
801 			RawSocket* raw = iterator.Next();
802 
803 			if (raw->Socket()->protocol == buffer->protocol)
804 				raw->EnqueueClone(buffer);
805 		}
806 	}
807 }
808 
809 
810 static inline sockaddr*
811 fill_sockaddr_in6(sockaddr_in6* target, const in6_addr &address)
812 {
813 	target->sin6_family = AF_INET6;
814 	target->sin6_len = sizeof(sockaddr_in6);
815 	target->sin6_port = 0;
816 	target->sin6_flowinfo = 0;
817 	memcpy(target->sin6_addr.s6_addr, address.s6_addr, sizeof(in6_addr));
818 	target->sin6_scope_id = 0;
819 	return (sockaddr*)target;
820 }
821 
822 
823 status_t
824 IPv6Multicast::JoinGroup(IPv6GroupInterface* state)
825 {
826 	MutexLocker _(sMulticastGroupsLock);
827 
828 	sockaddr_in6 groupAddr;
829 	status_t status = sDatalinkModule->join_multicast(state->Interface(),
830 		sDomain, fill_sockaddr_in6(&groupAddr, state->Address()));
831 	if (status != B_OK)
832 		return status;
833 
834 	sMulticastState->Insert(state);
835 	return B_OK;
836 }
837 
838 
839 status_t
840 IPv6Multicast::LeaveGroup(IPv6GroupInterface* state)
841 {
842 	MutexLocker _(sMulticastGroupsLock);
843 
844 	sMulticastState->Remove(state);
845 
846 	sockaddr_in6 groupAddr;
847 	return sDatalinkModule->leave_multicast(state->Interface(), sDomain,
848 		fill_sockaddr_in6(&groupAddr, state->Address()));
849 }
850 
851 
852 static net_protocol_module_info*
853 receiving_protocol(uint8 protocol)
854 {
855 	net_protocol_module_info* module = sReceivingProtocol[protocol];
856 	if (module != NULL)
857 		return module;
858 
859 	MutexLocker locker(sReceivingProtocolLock);
860 
861 	module = sReceivingProtocol[protocol];
862 	if (module != NULL)
863 		return module;
864 
865 	if (gStackModule->get_domain_receiving_protocol(sDomain, protocol,
866 			&module) == B_OK)
867 		sReceivingProtocol[protocol] = module;
868 
869 	return module;
870 }
871 
872 
873 static status_t
874 ipv6_delta_group(IPv6GroupInterface* group, int option,
875 	net_interface* interface, const in6_addr* sourceAddr)
876 {
877 	switch (option) {
878 		case IPV6_JOIN_GROUP:
879 			return group->Add();
880 		case IPV6_LEAVE_GROUP:
881 			return group->Drop();
882 	}
883 
884 	return B_ERROR;
885 }
886 
887 
888 static status_t
889 ipv6_delta_membership(ipv6_protocol* protocol, int option,
890 	net_interface* interface, const in6_addr* groupAddr,
891 	const in6_addr* sourceAddr)
892 {
893 	IPv6MulticastFilter &filter = protocol->multicast_filter;
894 	IPv6GroupInterface* state = NULL;
895 	status_t status = B_OK;
896 
897 	switch (option) {
898 		// TODO: support more options
899 		case IPV6_JOIN_GROUP:
900 			status = filter.GetState(*groupAddr, interface, state, true);
901 			break;
902 
903 		case IPV6_LEAVE_GROUP:
904 			filter.GetState(*groupAddr, interface, state, false);
905 			if (state == NULL)
906 				return EADDRNOTAVAIL;
907 			break;
908 	}
909 
910 	if (status != B_OK)
911 		return status;
912 
913 	status = ipv6_delta_group(state, option, interface, sourceAddr);
914 	filter.ReturnState(state);
915 	return status;
916 }
917 
918 
919 static status_t
920 ipv6_delta_membership(ipv6_protocol* protocol, int option,
921 	uint32 interfaceIndex, in6_addr* groupAddr, in6_addr* sourceAddr)
922 {
923 	net_interface* interface;
924 
925 	// TODO: can the interface be unspecified?
926 	interface = sDatalinkModule->get_interface(sDomain, interfaceIndex);
927 
928 	if (interface == NULL)
929 		return B_DEVICE_NOT_FOUND;
930 
931 	return ipv6_delta_membership(protocol, option, interface,
932 		groupAddr, sourceAddr);
933 }
934 
935 
936 static status_t
937 get_int_option(void* target, size_t length, int value)
938 {
939 	if (length != sizeof(int))
940 		return B_BAD_VALUE;
941 
942 	return user_memcpy(target, &value, sizeof(int));
943 }
944 
945 
946 template<typename Type> static status_t
947 set_int_option(Type &target, const void* _value, size_t length)
948 {
949 	int value;
950 
951 	if (length != sizeof(int))
952 		return B_BAD_VALUE;
953 
954 	if (user_memcpy(&value, _value, sizeof(int)) != B_OK)
955 		return B_BAD_ADDRESS;
956 
957 	target = value;
958 	return B_OK;
959 }
960 
961 
962 //	#pragma mark -
963 
964 
965 net_protocol*
966 ipv6_init_protocol(net_socket* socket)
967 {
968 	ipv6_protocol* protocol = new (std::nothrow) ipv6_protocol();
969 	if (protocol == NULL)
970 		return NULL;
971 
972 	protocol->raw = NULL;
973 	protocol->service_type = 0;
974 	protocol->time_to_live = kDefaultTTL;
975 	protocol->multicast_time_to_live = kDefaultMulticastTTL;
976 	protocol->receive_hoplimit = 0;
977 	protocol->receive_pktinfo = 0;
978 	protocol->multicast_address = NULL;
979 	return protocol;
980 }
981 
982 
983 status_t
984 ipv6_uninit_protocol(net_protocol* _protocol)
985 {
986 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
987 
988 	delete protocol;
989 	return B_OK;
990 }
991 
992 
993 /*!	Since open() is only called on the top level protocol, when we get here
994 	it means we are on a SOCK_RAW socket.
995 */
996 status_t
997 ipv6_open(net_protocol* _protocol)
998 {
999 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1000 
1001 	RawSocket* raw = new (std::nothrow) RawSocket(protocol->socket);
1002 	if (raw == NULL)
1003 		return B_NO_MEMORY;
1004 
1005 	status_t status = raw->InitCheck();
1006 	if (status != B_OK) {
1007 		delete raw;
1008 		return status;
1009 	}
1010 
1011 	TRACE_SK(protocol, "Open()");
1012 
1013 	protocol->raw = raw;
1014 
1015 	MutexLocker locker(sRawSocketsLock);
1016 	sRawSockets.Add(raw);
1017 	return B_OK;
1018 }
1019 
1020 
1021 status_t
1022 ipv6_close(net_protocol* _protocol)
1023 {
1024 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1025 	RawSocket* raw = protocol->raw;
1026 	if (raw == NULL)
1027 		return B_ERROR;
1028 
1029 	TRACE_SK(protocol, "Close()");
1030 
1031 	MutexLocker locker(sRawSocketsLock);
1032 	sRawSockets.Remove(raw);
1033 	delete raw;
1034 	protocol->raw = NULL;
1035 
1036 	return B_OK;
1037 }
1038 
1039 
1040 status_t
1041 ipv6_free(net_protocol* protocol)
1042 {
1043 	return B_OK;
1044 }
1045 
1046 
1047 status_t
1048 ipv6_connect(net_protocol* _protocol, const struct sockaddr* address)
1049 {
1050 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1051 	RawSocket* raw = protocol->raw;
1052 	if (raw == NULL)
1053 		return B_ERROR;
1054 	if (address->sa_len != sizeof(struct sockaddr_in6))
1055 		return B_BAD_VALUE;
1056 	if (address->sa_family != AF_INET6)
1057 		return EAFNOSUPPORT;
1058 
1059 	memcpy(&protocol->socket->peer, address, sizeof(struct sockaddr_in6));
1060 	sSocketModule->set_connected(protocol->socket);
1061 
1062 	return B_OK;
1063 }
1064 
1065 
1066 status_t
1067 ipv6_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
1068 {
1069 	return EOPNOTSUPP;
1070 }
1071 
1072 
1073 status_t
1074 ipv6_control(net_protocol* _protocol, int level, int option, void* value,
1075 	size_t* _length)
1076 {
1077 	if ((level & LEVEL_MASK) != IPPROTO_IPV6)
1078 		return sDatalinkModule->control(sDomain, option, value, _length);
1079 
1080 	return B_BAD_VALUE;
1081 }
1082 
1083 
1084 status_t
1085 ipv6_getsockopt(net_protocol* _protocol, int level, int option, void* value,
1086 	int* _length)
1087 {
1088 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1089 
1090 	if (level == IPPROTO_IPV6) {
1091 		// TODO: support more of these options
1092 
1093 		if (option == IPV6_MULTICAST_HOPS) {
1094 			return get_int_option(value, *_length,
1095 				protocol->multicast_time_to_live);
1096 		}
1097 		if (option == IPV6_MULTICAST_LOOP)
1098 			return EOPNOTSUPP;
1099 		if (option == IPV6_UNICAST_HOPS)
1100 			return get_int_option(value, *_length, protocol->time_to_live);
1101 		if (option == IPV6_V6ONLY)
1102 			return EOPNOTSUPP;
1103 		if (option == IPV6_RECVPKTINFO)
1104 			return get_int_option(value, *_length, protocol->receive_pktinfo);
1105 		if (option == IPV6_RECVHOPLIMIT)
1106 			return get_int_option(value, *_length, protocol->receive_hoplimit);
1107 		if (option == IPV6_JOIN_GROUP
1108 			|| option == IPV6_LEAVE_GROUP)
1109 			return EOPNOTSUPP;
1110 
1111 		dprintf("IPv6::getsockopt(): get unknown option: %d\n", option);
1112 		return ENOPROTOOPT;
1113 	}
1114 
1115 	return sSocketModule->get_option(protocol->socket, level, option, value,
1116 		_length);
1117 }
1118 
1119 
1120 status_t
1121 ipv6_setsockopt(net_protocol* _protocol, int level, int option,
1122 	const void* value, int length)
1123 {
1124 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1125 
1126 	if (level == IPPROTO_IPV6) {
1127 		// TODO: support more of these options
1128 
1129 		if (option == IPV6_MULTICAST_IF) {
1130 			if (length != sizeof(struct in6_addr))
1131 				return B_BAD_VALUE;
1132 
1133 			struct sockaddr_in6* address = new (std::nothrow) sockaddr_in6;
1134 			if (address == NULL)
1135 				return B_NO_MEMORY;
1136 
1137 			if (user_memcpy(&address->sin6_addr, value, sizeof(in6_addr))
1138 					!= B_OK) {
1139 				delete address;
1140 				return B_BAD_ADDRESS;
1141 			}
1142 
1143 			// Using the unspecifed address to remove the previous setting.
1144 			if (IN6_IS_ADDR_UNSPECIFIED(&address->sin6_addr)) {
1145 				delete address;
1146 				delete protocol->multicast_address;
1147 				protocol->multicast_address = NULL;
1148 				return B_OK;
1149 			}
1150 
1151 			struct net_interface* interface
1152 				= sDatalinkModule->get_interface_with_address(
1153 					(sockaddr*)address);
1154 			if (interface == NULL) {
1155 				delete address;
1156 				return EADDRNOTAVAIL;
1157 			}
1158 
1159 			delete protocol->multicast_address;
1160 			protocol->multicast_address = (struct sockaddr*)address;
1161 
1162 			sDatalinkModule->put_interface(interface);
1163 			return B_OK;
1164 		}
1165 		if (option == IPV6_MULTICAST_HOPS) {
1166 			return set_int_option(protocol->multicast_time_to_live,
1167 				value, length);
1168 		}
1169 		if (option == IPV6_MULTICAST_LOOP)
1170 			return EOPNOTSUPP;
1171 		if (option == IPV6_UNICAST_HOPS)
1172 			return set_int_option(protocol->time_to_live, value, length);
1173 		if (option == IPV6_V6ONLY)
1174 			return EOPNOTSUPP;
1175 		if (option == IPV6_RECVPKTINFO)
1176 			return set_int_option(protocol->receive_pktinfo, value, length);
1177 		if (option == IPV6_RECVHOPLIMIT)
1178 			return set_int_option(protocol->receive_hoplimit, value, length);
1179 		if (option == IPV6_JOIN_GROUP || option == IPV6_LEAVE_GROUP) {
1180 			ipv6_mreq mreq;
1181 			if (length != sizeof(ipv6_mreq))
1182 				return B_BAD_VALUE;
1183 			if (user_memcpy(&mreq, value, sizeof(ipv6_mreq)) != B_OK)
1184 				return B_BAD_ADDRESS;
1185 
1186 			return ipv6_delta_membership(protocol, option,
1187 				mreq.ipv6mr_interface, &mreq.ipv6mr_multiaddr, NULL);
1188 		}
1189 
1190 		dprintf("IPv6::setsockopt(): set unknown option: %d\n", option);
1191 		return ENOPROTOOPT;
1192 	}
1193 
1194 	return sSocketModule->set_option(protocol->socket, level, option,
1195 		value, length);
1196 }
1197 
1198 
1199 status_t
1200 ipv6_bind(net_protocol* protocol, const sockaddr* _address)
1201 {
1202 	if (_address->sa_family != AF_INET6)
1203 		return EAFNOSUPPORT;
1204 
1205 	const sockaddr_in6* address = (const sockaddr_in6*)_address;
1206 
1207 	// only INADDR_ANY and addresses of local interfaces are accepted:
1208 	if (IN6_IS_ADDR_UNSPECIFIED(&address->sin6_addr)
1209 		|| IN6_IS_ADDR_MULTICAST(&address->sin6_addr)
1210 		|| sDatalinkModule->is_local_address(sDomain, _address, NULL, NULL)) {
1211 		memcpy(&protocol->socket->address, address, sizeof(sockaddr_in6));
1212 		protocol->socket->address.ss_len = sizeof(sockaddr_in6);
1213 			// explicitly set length, as our callers can't be trusted to
1214 			// always provide the correct length!
1215 		return B_OK;
1216 	}
1217 
1218 	return B_ERROR;
1219 		// address is unknown on this host
1220 }
1221 
1222 
1223 status_t
1224 ipv6_unbind(net_protocol* protocol, struct sockaddr* address)
1225 {
1226 	// nothing to do here
1227 	return B_OK;
1228 }
1229 
1230 
1231 status_t
1232 ipv6_listen(net_protocol* protocol, int count)
1233 {
1234 	return EOPNOTSUPP;
1235 }
1236 
1237 
1238 status_t
1239 ipv6_shutdown(net_protocol* protocol, int direction)
1240 {
1241 	return EOPNOTSUPP;
1242 }
1243 
1244 
1245 static uint8
1246 ip6_select_hoplimit(net_protocol* _protocol, net_buffer* buffer)
1247 {
1248 	// TODO: the precedence should be as follows:
1249 	// 1. Hoplimit value specified via ioctl.
1250 	// 2. (If the outgoing interface is detected) the current
1251 	//     hop limit of the interface specified by router advertisement.
1252 	// 3. The system default hoplimit.
1253 
1254 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1255 	const bool isMulticast = buffer->msg_flags & MSG_MCAST;
1256 
1257 	if (protocol) {
1258 		return isMulticast ? protocol->multicast_time_to_live
1259 			: protocol->time_to_live;
1260 	}
1261 	return isMulticast ? kDefaultMulticastTTL : kDefaultTTL;
1262 }
1263 
1264 
1265 status_t
1266 ipv6_send_routed_data(net_protocol* _protocol, struct net_route* route,
1267 	net_buffer* buffer)
1268 {
1269 	if (route == NULL)
1270 		return B_BAD_VALUE;
1271 
1272 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1273 	net_interface* interface = route->interface_address->interface;
1274 	uint8 protocolNumber;
1275 	if (protocol != NULL && protocol->socket != NULL)
1276 		protocolNumber = protocol->socket->protocol;
1277 	else
1278 		protocolNumber = buffer->protocol;
1279 
1280 	TRACE_SK(protocol, "SendRoutedData(%p, %p [%" B_PRIu32 " bytes])", route,
1281 		buffer, buffer->size);
1282 
1283 	sockaddr_in6& source = *(sockaddr_in6*)buffer->source;
1284 	sockaddr_in6& destination = *(sockaddr_in6*)buffer->destination;
1285 
1286 	buffer->msg_flags &= ~(MSG_BCAST | MSG_MCAST);
1287 
1288 	if (IN6_IS_ADDR_UNSPECIFIED(&destination.sin6_addr))
1289 		return EDESTADDRREQ;
1290 
1291 	if (IN6_IS_ADDR_MULTICAST(&destination.sin6_addr))
1292 		buffer->msg_flags |= MSG_MCAST;
1293 
1294 	uint16 dataLength = buffer->size;
1295 
1296 	// Add IPv6 header
1297 
1298 	NetBufferPrepend<ip6_hdr> header(buffer);
1299 	if (header.Status() != B_OK)
1300 		return header.Status();
1301 
1302 	if (buffer->size > 0xffff)
1303 		return EMSGSIZE;
1304 
1305 	uint32 flowinfo = 0;
1306 		// TODO: fill in the flow id from somewhere
1307 	if (protocol) {
1308 		// fill in traffic class
1309 		flowinfo |= htonl(protocol->service_type << 20);
1310 	}
1311 	// set lower 28 bits
1312 	header->ip6_flow = htonl(flowinfo) & IPV6_FLOWINFO_MASK;
1313 	// set upper 4 bits
1314 	header->ip6_vfc |= IPV6_VERSION;
1315 	header->ip6_plen = htons(dataLength);
1316 	header->ip6_nxt = protocolNumber;
1317 	header->ip6_hlim = ip6_select_hoplimit(protocol, buffer);
1318 	memcpy(&header->ip6_src, &source.sin6_addr, sizeof(in6_addr));
1319 	memcpy(&header->ip6_dst, &destination.sin6_addr, sizeof(in6_addr));
1320 
1321 	header.Sync();
1322 
1323 	// write the checksum for ICMPv6 sockets
1324 	if (protocolNumber == IPPROTO_ICMPV6
1325 		&& dataLength >= sizeof(struct icmp6_hdr)) {
1326 		NetBufferField<uint16, sizeof(ip6_hdr)
1327 			+ offsetof(icmp6_hdr, icmp6_cksum)>
1328 			icmpChecksum(buffer);
1329 		// first make sure the existing checksum is zero
1330 		*icmpChecksum = 0;
1331 		icmpChecksum.Sync();
1332 
1333 		uint16 checksum = gBufferModule->checksum(buffer, sizeof(ip6_hdr),
1334 			buffer->size - sizeof(ip6_hdr), false);
1335 		checksum = ipv6_checksum(&header->ip6_src,
1336 			&header->ip6_dst, dataLength, protocolNumber,
1337 			checksum);
1338 		*icmpChecksum = checksum;
1339 	}
1340 
1341 	char addrbuf[INET6_ADDRSTRLEN];
1342 	ip6_sprintf(&destination.sin6_addr, addrbuf);
1343 	TRACE_SK(protocol, "  SendRoutedData(): destination: %s", addrbuf);
1344 
1345 	uint32 mtu = route->mtu ? route->mtu : interface->device->mtu;
1346 	if (buffer->size > mtu) {
1347 		// we need to fragment the packet
1348 		return send_fragments(protocol, route, buffer, mtu);
1349 	}
1350 
1351 	return sDatalinkModule->send_routed_data(route, buffer);
1352 }
1353 
1354 
1355 status_t
1356 ipv6_send_data(net_protocol* _protocol, net_buffer* buffer)
1357 {
1358 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1359 
1360 	TRACE_SK(protocol, "SendData(%p [%" B_PRIu32 " bytes])", buffer,
1361 		buffer->size);
1362 
1363 	sockaddr_in6* destination = (sockaddr_in6*)buffer->destination;
1364 
1365 	// handle IPV6_MULTICAST_IF
1366 	if (IN6_IS_ADDR_MULTICAST(&destination->sin6_addr)
1367 		&& protocol->multicast_address != NULL) {
1368 		net_interface_address* address = sDatalinkModule->get_interface_address(
1369 			protocol->multicast_address);
1370 		if (address == NULL || (address->interface->flags & IFF_UP) == 0) {
1371 			sDatalinkModule->put_interface_address(address);
1372 			return EADDRNOTAVAIL;
1373 		}
1374 
1375 		sDatalinkModule->put_interface_address(buffer->interface_address);
1376 		buffer->interface_address = address;
1377 			// the buffer takes over ownership of the address
1378 
1379 		net_route* route = sDatalinkModule->get_route(sDomain, address->local);
1380 		if (route == NULL)
1381 			return ENETUNREACH;
1382 
1383 		return sDatalinkModule->send_routed_data(route, buffer);
1384 	}
1385 
1386 	return sDatalinkModule->send_data(protocol, sDomain, buffer);
1387 }
1388 
1389 
1390 ssize_t
1391 ipv6_send_avail(net_protocol* protocol)
1392 {
1393 	return B_ERROR;
1394 }
1395 
1396 
1397 status_t
1398 ipv6_read_data(net_protocol* _protocol, size_t numBytes, uint32 flags,
1399 	net_buffer** _buffer)
1400 {
1401 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1402 	RawSocket* raw = protocol->raw;
1403 	if (raw == NULL)
1404 		return B_ERROR;
1405 
1406 	TRACE_SK(protocol, "ReadData(%" B_PRIuSIZE ", 0x%" B_PRIu32 ")", numBytes,
1407 		flags);
1408 
1409 	return raw->Dequeue(flags, _buffer);
1410 }
1411 
1412 
1413 ssize_t
1414 ipv6_read_avail(net_protocol* _protocol)
1415 {
1416 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1417 	RawSocket* raw = protocol->raw;
1418 	if (raw == NULL)
1419 		return B_ERROR;
1420 
1421 	return raw->AvailableData();
1422 }
1423 
1424 
1425 struct net_domain*
1426 ipv6_get_domain(net_protocol* protocol)
1427 {
1428 	return sDomain;
1429 }
1430 
1431 
1432 size_t
1433 ipv6_get_mtu(net_protocol* protocol, const struct sockaddr* address)
1434 {
1435 	net_route* route = sDatalinkModule->get_route(sDomain, address);
1436 	if (route == NULL)
1437 		return 0;
1438 
1439 	size_t mtu;
1440 	if (route->mtu != 0)
1441 		mtu = route->mtu;
1442 	else
1443 		mtu = route->interface_address->interface->device->mtu;
1444 
1445 	sDatalinkModule->put_route(sDomain, route);
1446 	// TODO: what about extension headers?
1447 	// this function probably shoud be changed in calling places, not here
1448 	return mtu - sizeof(ip6_hdr);
1449 }
1450 
1451 
1452 status_t
1453 ipv6_receive_data(net_buffer* buffer)
1454 {
1455 	TRACE("ReceiveData(%p [%" B_PRIu32 " bytes])", buffer, buffer->size);
1456 
1457 	NetBufferHeaderReader<IPv6Header> bufferHeader(buffer);
1458 	if (bufferHeader.Status() != B_OK)
1459 		return bufferHeader.Status();
1460 
1461 	IPv6Header &header = bufferHeader.Data();
1462 	// dump_ipv6_header(header);
1463 
1464 	if (header.ProtocolVersion() != IPV6_VERSION)
1465 		return B_BAD_TYPE;
1466 
1467 	uint16 packetLength = header.PayloadLength() + sizeof(ip6_hdr);
1468 	if (packetLength > buffer->size)
1469 		return B_BAD_DATA;
1470 
1471 	// lower layers notion of Broadcast or Multicast have no relevance to us
1472 	buffer->msg_flags &= ~(MSG_BCAST | MSG_MCAST);
1473 
1474 	sockaddr_in6 destination;
1475 	fill_sockaddr_in6(&destination, header.Dst());
1476 
1477 	if (IN6_IS_ADDR_MULTICAST(&destination.sin6_addr)) {
1478 		buffer->msg_flags |= MSG_MCAST;
1479 	} else {
1480 		uint32 matchedAddressType = 0;
1481 
1482 		// test if the packet is really for us
1483 		if (!sDatalinkModule->is_local_address(sDomain, (sockaddr*)&destination,
1484 				&buffer->interface_address, &matchedAddressType)
1485 			&& !sDatalinkModule->is_local_link_address(sDomain, true,
1486 				buffer->destination, &buffer->interface_address)) {
1487 
1488 			char srcbuf[INET6_ADDRSTRLEN];
1489 			char dstbuf[INET6_ADDRSTRLEN];
1490 			ip6_sprintf(&header.Src(), srcbuf);
1491 			ip6_sprintf(&header.Dst(), dstbuf);
1492 			TRACE("  ipv6_receive_data(): packet was not for us %s -> %s",
1493 				srcbuf, dstbuf);
1494 
1495 			// TODO: Send ICMPv6 error: Host unreachable
1496 			return B_ERROR;
1497 		}
1498 
1499 		// copy over special address types (MSG_BCAST or MSG_MCAST):
1500 		buffer->msg_flags |= matchedAddressType;
1501 	}
1502 
1503 	// set net_buffer's source/destination address
1504 	fill_sockaddr_in6((struct sockaddr_in6*)buffer->source, header.Src());
1505 	memcpy(buffer->destination, &destination, sizeof(sockaddr_in6));
1506 
1507 	// get the transport protocol and transport header offset
1508 	uint16 transportHeaderOffset = header.GetHeaderOffset(buffer);
1509 	uint8 protocol = buffer->protocol;
1510 
1511 	// remove any trailing/padding data
1512 	status_t status = gBufferModule->trim(buffer, packetLength);
1513 	if (status != B_OK)
1514 		return status;
1515 
1516 	// check for fragmentation
1517 	uint16 fragmentHeaderOffset
1518 		= header.GetHeaderOffset(buffer, IPPROTO_FRAGMENT);
1519 
1520 	if (fragmentHeaderOffset != 0) {
1521 		// this is a fragment
1522 		TRACE("  ipv6_receive_data(): Found a Fragment!");
1523 		status = reassemble_fragments(header, &buffer, fragmentHeaderOffset);
1524 		TRACE("  ipv6_receive_data():  -> %s", strerror(status));
1525 		if (status != B_OK)
1526 			return status;
1527 
1528 		if (buffer == NULL) {
1529 			// buffer was put into fragment packet
1530 			TRACE("  ipv6_receive_data(): Not yet assembled.");
1531 			return B_OK;
1532 		}
1533 	}
1534 
1535 	// tell the buffer to preserve removed ipv6 header - may need it later
1536 	gBufferModule->store_header(buffer);
1537 
1538 	// remove ipv6 headers for now
1539 	gBufferModule->remove_header(buffer, transportHeaderOffset);
1540 
1541 	// deliver the data to raw sockets
1542 	raw_receive_data(buffer);
1543 
1544 	net_protocol_module_info* module = receiving_protocol(protocol);
1545 	if (module == NULL) {
1546 		// no handler for this packet
1547 		return EAFNOSUPPORT;
1548 	}
1549 
1550 	if ((buffer->msg_flags & MSG_MCAST) != 0) {
1551 		// Unfortunately historical reasons dictate that the IP multicast
1552 		// model be a little different from the unicast one. We deliver
1553 		// this frame directly to all sockets registered with interest
1554 		// for this multicast group.
1555 		return deliver_multicast(module, buffer, false);
1556 	}
1557 
1558 	return module->receive_data(buffer);
1559 }
1560 
1561 
1562 status_t
1563 ipv6_deliver_data(net_protocol* _protocol, net_buffer* buffer)
1564 {
1565 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1566 
1567 	if (protocol->raw == NULL)
1568 		return B_ERROR;
1569 
1570 	return protocol->raw->EnqueueClone(buffer);
1571 }
1572 
1573 
1574 status_t
1575 ipv6_error_received(net_error error, net_buffer* data)
1576 {
1577 	return B_ERROR;
1578 }
1579 
1580 
1581 status_t
1582 ipv6_error_reply(net_protocol* protocol, net_buffer* cause, net_error error,
1583 	net_error_data* errorData)
1584 {
1585 	return B_ERROR;
1586 }
1587 
1588 
1589 ssize_t
1590 ipv6_process_ancillary_data_no_container(net_protocol* _protocol,
1591 	net_buffer* buffer, void* msgControl, size_t msgControlLen)
1592 {
1593 	ipv6_protocol* protocol = (ipv6_protocol*)_protocol;
1594 	ssize_t bytesWritten = 0;
1595 
1596 	if (protocol->receive_hoplimit != 0) {
1597 		TRACE("receive_hoplimit");
1598 
1599 		if (msgControlLen < CMSG_SPACE(sizeof(int)))
1600 			return B_NO_MEMORY;
1601 
1602 		// use some default value (64 at the moment) when the real one fails
1603 		int hopLimit = IPV6_DEFHLIM;
1604 
1605 		if (gBufferModule->stored_header_length(buffer)
1606 				>= (int)sizeof(ip6_hdr)) {
1607 			IPv6Header header;
1608 			if (gBufferModule->restore_header(buffer, 0,
1609 					&header, sizeof(ip6_hdr)) == B_OK
1610 				&& header.ProtocolVersion() != IPV6_VERSION) {
1611 				// header is OK, take hoplimit from it
1612 				hopLimit = header.header.ip6_hlim;
1613 			}
1614 		}
1615 
1616 		cmsghdr* messageHeader = (cmsghdr*)((char*)msgControl + bytesWritten);
1617 		messageHeader->cmsg_len = CMSG_LEN(sizeof(int));
1618 		messageHeader->cmsg_level = IPPROTO_IPV6;
1619 		messageHeader->cmsg_type = IPV6_HOPLIMIT;
1620 
1621 		memcpy(CMSG_DATA(messageHeader), &hopLimit, sizeof(int));
1622 
1623 		bytesWritten += CMSG_SPACE(sizeof(int));
1624 		msgControlLen -= CMSG_SPACE(sizeof(int));
1625 	}
1626 
1627 	if (protocol->receive_pktinfo != 0) {
1628 		TRACE("receive_pktinfo");
1629 
1630 		if (msgControlLen < CMSG_SPACE(sizeof(struct in6_pktinfo)))
1631 			return B_NO_MEMORY;
1632 
1633 		cmsghdr* messageHeader = (cmsghdr*)((char*)msgControl + bytesWritten);
1634 		messageHeader->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
1635 		messageHeader->cmsg_level = IPPROTO_IPV6;
1636 		messageHeader->cmsg_type = IPV6_PKTINFO;
1637 
1638 		struct in6_pktinfo pi;
1639 		memcpy(&pi.ipi6_addr,
1640 			&((struct sockaddr_in6*)buffer->destination)->sin6_addr,
1641 			sizeof(struct in6_addr));
1642 		if (buffer->interface_address != NULL
1643 			&& buffer->interface_address->interface != NULL)
1644 			pi.ipi6_ifindex = buffer->interface_address->interface->index;
1645 		else
1646 			pi.ipi6_ifindex = 0;
1647 		memcpy(CMSG_DATA(messageHeader), &pi, sizeof(struct in6_pktinfo));
1648 
1649 		bytesWritten += CMSG_SPACE(sizeof(struct in6_pktinfo));
1650 		msgControlLen -= CMSG_SPACE(sizeof(struct in6_pktinfo));
1651 	}
1652 
1653 	return bytesWritten;
1654 }
1655 
1656 
1657 //	#pragma mark -
1658 
1659 
1660 status_t
1661 init_ipv6()
1662 {
1663 	mutex_init(&sRawSocketsLock, "raw sockets");
1664 	mutex_init(&sFragmentLock, "IPv4 Fragments");
1665 	mutex_init(&sMulticastGroupsLock, "IPv6 multicast groups");
1666 	mutex_init(&sReceivingProtocolLock, "IPv6 receiving protocols");
1667 
1668 	status_t status;
1669 
1670 	sMulticastState = new MulticastState();
1671 	if (sMulticastState == NULL) {
1672 		status = B_NO_MEMORY;
1673 		goto err1;
1674 	}
1675 
1676 	status = sMulticastState->Init();
1677 	if (status != B_OK)
1678 		goto err2;
1679 
1680 	new (&sFragmentHash) FragmentTable();
1681 	status = sFragmentHash.Init(256);
1682 	if (status != B_OK)
1683 		goto err3;
1684 
1685 	new (&sRawSockets) RawSocketList;
1686 		// static initializers do not work in the kernel,
1687 		// so we have to do it here, manually
1688 		// TODO: for modules, this shouldn't be required
1689 
1690 	status = gStackModule->register_domain_protocols(AF_INET6, SOCK_RAW, 0,
1691 		NET_IPV6_MODULE_NAME, NULL);
1692 	if (status != B_OK)
1693 		goto err3;
1694 
1695 	status = gStackModule->register_domain(AF_INET6, "internet6", &gIPv6Module,
1696 		&gIPv6AddressModule, &sDomain);
1697 	if (status != B_OK)
1698 		goto err3;
1699 
1700 	TRACE("init_ipv6: OK");
1701 	return B_OK;
1702 
1703 err3:
1704 	sFragmentHash.~FragmentTable();
1705 err2:
1706 	delete sMulticastState;
1707 err1:
1708 	mutex_destroy(&sReceivingProtocolLock);
1709 	mutex_destroy(&sMulticastGroupsLock);
1710 	mutex_destroy(&sFragmentLock);
1711 	mutex_destroy(&sRawSocketsLock);
1712 	TRACE("init_ipv6: error %s", strerror(status));
1713 	return status;
1714 }
1715 
1716 
1717 status_t
1718 uninit_ipv6()
1719 {
1720 	mutex_lock(&sReceivingProtocolLock);
1721 
1722 	// put all the domain receiving protocols we gathered so far
1723 	for (uint32 i = 0; i < 256; i++) {
1724 		if (sReceivingProtocol[i] != NULL)
1725 			gStackModule->put_domain_receiving_protocol(sDomain, i);
1726 	}
1727 
1728 	sFragmentHash.~FragmentTable();
1729 	delete sMulticastState;
1730 
1731 	gStackModule->unregister_domain(sDomain);
1732 	mutex_unlock(&sReceivingProtocolLock);
1733 
1734 	mutex_destroy(&sMulticastGroupsLock);
1735 	mutex_destroy(&sFragmentLock);
1736 	mutex_destroy(&sRawSocketsLock);
1737 	mutex_destroy(&sReceivingProtocolLock);
1738 
1739 	return B_OK;
1740 }
1741 
1742 
1743 static status_t
1744 ipv6_std_ops(int32 op, ...)
1745 {
1746 	switch (op) {
1747 		case B_MODULE_INIT:
1748 			return init_ipv6();
1749 		case B_MODULE_UNINIT:
1750 			return uninit_ipv6();
1751 		default:
1752 			return B_ERROR;
1753 	}
1754 }
1755 
1756 
1757 net_protocol_module_info gIPv6Module = {
1758 	{
1759 		NET_IPV6_MODULE_NAME,
1760 		0,
1761 		ipv6_std_ops
1762 	},
1763 	NET_PROTOCOL_ATOMIC_MESSAGES,
1764 
1765 	ipv6_init_protocol,
1766 	ipv6_uninit_protocol,
1767 	ipv6_open,
1768 	ipv6_close,
1769 	ipv6_free,
1770 	ipv6_connect,
1771 	ipv6_accept,
1772 	ipv6_control,
1773 	ipv6_getsockopt,
1774 	ipv6_setsockopt,
1775 	ipv6_bind,
1776 	ipv6_unbind,
1777 	ipv6_listen,
1778 	ipv6_shutdown,
1779 	ipv6_send_data,
1780 	ipv6_send_routed_data,
1781 	ipv6_send_avail,
1782 	ipv6_read_data,
1783 	ipv6_read_avail,
1784 	ipv6_get_domain,
1785 	ipv6_get_mtu,
1786 	ipv6_receive_data,
1787 	ipv6_deliver_data,
1788 	ipv6_error_received,
1789 	ipv6_error_reply,
1790 	NULL,		// add_ancillary_data()
1791 	NULL,		// process_ancillary_data()
1792 	ipv6_process_ancillary_data_no_container,
1793 	NULL,		// send_data_no_buffer()
1794 	NULL		// read_data_no_buffer()
1795 };
1796 
1797 module_dependency module_dependencies[] = {
1798 	{NET_STACK_MODULE_NAME, (module_info**)&gStackModule},
1799 	{NET_BUFFER_MODULE_NAME, (module_info**)&gBufferModule},
1800 	{NET_DATALINK_MODULE_NAME, (module_info**)&sDatalinkModule},
1801 	{NET_SOCKET_MODULE_NAME, (module_info**)&sSocketModule},
1802 	{}
1803 };
1804 
1805 module_info* modules[] = {
1806 	(module_info*)&gIPv6Module,
1807 	NULL
1808 };
1809