xref: /haiku/src/add-ons/kernel/network/stack/net_buffer.cpp (revision 9760dcae2038d47442f4658c2575844c6cf92c40)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *		Ingo Weinhold, ingo_weinhold@gmx.de
8  */
9 
10 
11 #include "utility.h"
12 
13 #include <net_buffer.h>
14 #include <slab/Slab.h>
15 #include <tracing.h>
16 #include <util/list.h>
17 
18 #include <ByteOrder.h>
19 #include <debug.h>
20 #include <kernel.h>
21 #include <KernelExport.h>
22 #include <util/DoublyLinkedList.h>
23 
24 #include <algorithm>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <sys/param.h>
28 #include <sys/uio.h>
29 
30 #include "ancillary_data.h"
31 
32 #include "paranoia_config.h"
33 
34 
35 //#define TRACE_BUFFER
36 #ifdef TRACE_BUFFER
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 #define BUFFER_SIZE 2048
43 	// maximum implementation derived buffer size is 65536
44 
45 #define ENABLE_DEBUGGER_COMMANDS	1
46 //#define ENABLE_STATS				1
47 #define PARANOID_BUFFER_CHECK		NET_BUFFER_PARANOIA
48 
49 #define COMPONENT_PARANOIA_LEVEL	NET_BUFFER_PARANOIA
50 #include <debug_paranoia.h>
51 
52 #define DATA_NODE_READ_ONLY		0x1
53 
54 struct header_space {
55 	uint16	size;
56 	uint16	free;
57 };
58 
59 struct free_data {
60 	struct free_data* next;
61 	uint16			size;
62 };
63 
64 struct data_header {
65 	int32			ref_count;
66 	addr_t			physical_address;
67 	free_data*		first_free;
68 	uint8*			data_end;
69 	header_space	space;
70 	uint16			tail_space;
71 };
72 
73 struct data_node {
74 	struct list_link link;
75 	struct data_header* header;
76 	struct data_header* located;
77 	size_t			offset;		// the net_buffer-wide offset of this node
78 	uint8*			start;		// points to the start of the data
79 	uint16			flags;
80 	uint16			used;		// defines how much memory is used by this node
81 
82 	uint16 HeaderSpace() const
83 	{
84 		if ((flags & DATA_NODE_READ_ONLY) != 0)
85 			return 0;
86 		return header->space.free;
87 	}
88 
89 	void AddHeaderSpace(uint16 toAdd)
90 	{
91 		if ((flags & DATA_NODE_READ_ONLY) == 0) {
92 			header->space.size += toAdd;
93 			header->space.free += toAdd;
94 		}
95 	}
96 
97 	void SubtractHeaderSpace(uint16 toSubtract)
98 	{
99 		if ((flags & DATA_NODE_READ_ONLY) == 0) {
100 			header->space.size -= toSubtract;
101 			header->space.free -= toSubtract;
102 		}
103 	}
104 
105 	uint16 TailSpace() const
106 	{
107 		if ((flags & DATA_NODE_READ_ONLY) != 0)
108 			return 0;
109 		return header->tail_space;
110 	}
111 
112 	void SetTailSpace(uint16 space)
113 	{
114 		if ((flags & DATA_NODE_READ_ONLY) == 0)
115 			header->tail_space = space;
116 	}
117 
118 	void FreeSpace()
119 	{
120 		if ((flags & DATA_NODE_READ_ONLY) == 0) {
121 			uint16 space = used + header->tail_space;
122 			header->space.size += space;
123 			header->space.free += space;
124 			header->tail_space = 0;
125 		}
126 	}
127 };
128 
129 struct net_buffer_private : net_buffer {
130 	struct list					buffers;
131 	data_header*				allocation_header;
132 		// the current place where we allocate header space (nodes, ...)
133 	ancillary_data_container*	ancillary_data;
134 
135 	struct {
136 		struct sockaddr_storage	source;
137 		struct sockaddr_storage	destination;
138 	} storage;
139 };
140 
141 
142 #define DATA_HEADER_SIZE				_ALIGN(sizeof(data_header))
143 #define DATA_NODE_SIZE					_ALIGN(sizeof(data_node))
144 #define MAX_FREE_BUFFER_SIZE			(BUFFER_SIZE - DATA_HEADER_SIZE)
145 
146 
147 static object_cache* sNetBufferCache;
148 static object_cache* sDataNodeCache;
149 
150 
151 static status_t append_data(net_buffer* buffer, const void* data, size_t size);
152 static status_t trim_data(net_buffer* _buffer, size_t newSize);
153 static status_t remove_header(net_buffer* _buffer, size_t bytes);
154 static status_t remove_trailer(net_buffer* _buffer, size_t bytes);
155 static status_t append_cloned_data(net_buffer* _buffer, net_buffer* _source,
156 					uint32 offset, size_t bytes);
157 static status_t read_data(net_buffer* _buffer, size_t offset, void* data,
158 					size_t size);
159 
160 
161 #if ENABLE_STATS
162 static vint32 sAllocatedDataHeaderCount = 0;
163 static vint32 sAllocatedNetBufferCount = 0;
164 static vint32 sEverAllocatedDataHeaderCount = 0;
165 static vint32 sEverAllocatedNetBufferCount = 0;
166 #endif
167 
168 
169 #if NET_BUFFER_TRACING
170 
171 
172 namespace NetBufferTracing {
173 
174 
175 class NetBufferTraceEntry : public AbstractTraceEntry {
176 public:
177 	NetBufferTraceEntry(net_buffer* buffer)
178 		:
179 		fBuffer(buffer)
180 	{
181 #if NET_BUFFER_TRACING_STACK_TRACE
182 	fStackTrace = capture_tracing_stack_trace(
183 		NET_BUFFER_TRACING_STACK_TRACE, 0, false);
184 #endif
185 	}
186 
187 #if NET_BUFFER_TRACING_STACK_TRACE
188 	virtual void DumpStackTrace(TraceOutput& out)
189 	{
190 		out.PrintStackTrace(fStackTrace);
191 	}
192 #endif
193 
194 protected:
195 	net_buffer*	fBuffer;
196 #if NET_BUFFER_TRACING_STACK_TRACE
197 	tracing_stack_trace* fStackTrace;
198 #endif
199 };
200 
201 
202 class Create : public NetBufferTraceEntry {
203 public:
204 	Create(size_t headerSpace, net_buffer* buffer)
205 		:
206 		NetBufferTraceEntry(buffer),
207 		fHeaderSpace(headerSpace)
208 	{
209 		Initialized();
210 	}
211 
212 	virtual void AddDump(TraceOutput& out)
213 	{
214 		out.Print("net buffer create: header space: %lu -> buffer: %p",
215 			fHeaderSpace, fBuffer);
216 	}
217 
218 private:
219 	size_t		fHeaderSpace;
220 };
221 
222 
223 class Free : public NetBufferTraceEntry {
224 public:
225 	Free(net_buffer* buffer)
226 		:
227 		NetBufferTraceEntry(buffer)
228 	{
229 		Initialized();
230 	}
231 
232 	virtual void AddDump(TraceOutput& out)
233 	{
234 		out.Print("net buffer free: buffer: %p", fBuffer);
235 	}
236 };
237 
238 
239 class Duplicate : public NetBufferTraceEntry {
240 public:
241 	Duplicate(net_buffer* buffer, net_buffer* clone)
242 		:
243 		NetBufferTraceEntry(buffer),
244 		fClone(clone)
245 	{
246 		Initialized();
247 	}
248 
249 	virtual void AddDump(TraceOutput& out)
250 	{
251 		out.Print("net buffer dup: buffer: %p -> %p", fBuffer, fClone);
252 	}
253 
254 private:
255 	net_buffer*		fClone;
256 };
257 
258 
259 class Clone : public NetBufferTraceEntry {
260 public:
261 	Clone(net_buffer* buffer, bool shareFreeSpace, net_buffer* clone)
262 		:
263 		NetBufferTraceEntry(buffer),
264 		fClone(clone),
265 		fShareFreeSpace(shareFreeSpace)
266 	{
267 		Initialized();
268 	}
269 
270 	virtual void AddDump(TraceOutput& out)
271 	{
272 		out.Print("net buffer clone: buffer: %p, share free space: %s "
273 			"-> %p", fBuffer, fShareFreeSpace ? "true" : "false", fClone);
274 	}
275 
276 private:
277 	net_buffer*		fClone;
278 	bool			fShareFreeSpace;
279 };
280 
281 
282 class Split : public NetBufferTraceEntry {
283 public:
284 	Split(net_buffer* buffer, uint32 offset, net_buffer* newBuffer)
285 		:
286 		NetBufferTraceEntry(buffer),
287 		fNewBuffer(newBuffer),
288 		fOffset(offset)
289 	{
290 		Initialized();
291 	}
292 
293 	virtual void AddDump(TraceOutput& out)
294 	{
295 		out.Print("net buffer split: buffer: %p, offset: %lu "
296 			"-> %p", fBuffer, fOffset, fNewBuffer);
297 	}
298 
299 private:
300 	net_buffer*		fNewBuffer;
301 	uint32			fOffset;
302 };
303 
304 
305 class Merge : public NetBufferTraceEntry {
306 public:
307 	Merge(net_buffer* buffer, net_buffer* otherBuffer, bool after)
308 		:
309 		NetBufferTraceEntry(buffer),
310 		fOtherBuffer(otherBuffer),
311 		fAfter(after)
312 	{
313 		Initialized();
314 	}
315 
316 	virtual void AddDump(TraceOutput& out)
317 	{
318 		out.Print("net buffer merge: buffers: %p + %p, after: %s "
319 			"-> %p", fBuffer, fOtherBuffer, fAfter ? "true" : "false",
320 			fOtherBuffer);
321 	}
322 
323 private:
324 	net_buffer*		fOtherBuffer;
325 	bool			fAfter;
326 };
327 
328 
329 class AppendCloned : public NetBufferTraceEntry {
330 public:
331 	AppendCloned(net_buffer* buffer, net_buffer* source, uint32 offset,
332 		size_t size)
333 		:
334 		NetBufferTraceEntry(buffer),
335 		fSource(source),
336 		fOffset(offset),
337 		fSize(size)
338 	{
339 		Initialized();
340 	}
341 
342 	virtual void AddDump(TraceOutput& out)
343 	{
344 		out.Print("net buffer append cloned: buffer: %p, from: %p, "
345 			"offset: %lu, size: %lu", fBuffer, fSource, fOffset, fSize);
346 	}
347 
348 private:
349 	net_buffer*		fSource;
350 	uint32			fOffset;
351 	size_t			fSize;
352 };
353 
354 
355 class PrependSize : public NetBufferTraceEntry {
356 public:
357 	PrependSize(net_buffer* buffer, size_t size)
358 		:
359 		NetBufferTraceEntry(buffer),
360 		fSize(size)
361 	{
362 		Initialized();
363 	}
364 
365 	virtual void AddDump(TraceOutput& out)
366 	{
367 		out.Print("net buffer prepend size: buffer: %p, size: %lu", fBuffer,
368 			fSize);
369 	}
370 
371 private:
372 	size_t			fSize;
373 };
374 
375 
376 class AppendSize : public NetBufferTraceEntry {
377 public:
378 	AppendSize(net_buffer* buffer, size_t size)
379 		:
380 		NetBufferTraceEntry(buffer),
381 		fSize(size)
382 	{
383 		Initialized();
384 	}
385 
386 	virtual void AddDump(TraceOutput& out)
387 	{
388 		out.Print("net buffer append size: buffer: %p, size: %lu", fBuffer,
389 			fSize);
390 	}
391 
392 private:
393 	size_t			fSize;
394 };
395 
396 
397 class RemoveHeader : public NetBufferTraceEntry {
398 public:
399 	RemoveHeader(net_buffer* buffer, size_t size)
400 		:
401 		NetBufferTraceEntry(buffer),
402 		fSize(size)
403 	{
404 		Initialized();
405 	}
406 
407 	virtual void AddDump(TraceOutput& out)
408 	{
409 		out.Print("net buffer remove header: buffer: %p, size: %lu",
410 			fBuffer, fSize);
411 	}
412 
413 private:
414 	size_t			fSize;
415 };
416 
417 
418 class Trim : public NetBufferTraceEntry {
419 public:
420 	Trim(net_buffer* buffer, size_t size)
421 		:
422 		NetBufferTraceEntry(buffer),
423 		fSize(size)
424 	{
425 		Initialized();
426 	}
427 
428 	virtual void AddDump(TraceOutput& out)
429 	{
430 		out.Print("net buffer trim: buffer: %p, size: %lu",
431 			fBuffer, fSize);
432 	}
433 
434 private:
435 	size_t			fSize;
436 };
437 
438 
439 class Read : public NetBufferTraceEntry {
440 public:
441 	Read(net_buffer* buffer, uint32 offset, void* data, size_t size)
442 		:
443 		NetBufferTraceEntry(buffer),
444 		fData(data),
445 		fOffset(offset),
446 		fSize(size)
447 	{
448 		Initialized();
449 	}
450 
451 	virtual void AddDump(TraceOutput& out)
452 	{
453 		out.Print("net buffer read: buffer: %p, offset: %lu, size: %lu, "
454 			"data: %p", fBuffer, fOffset, fSize, fData);
455 	}
456 
457 private:
458 	void*			fData;
459 	uint32			fOffset;
460 	size_t			fSize;
461 };
462 
463 
464 class Write : public NetBufferTraceEntry {
465 public:
466 	Write(net_buffer* buffer, uint32 offset, const void* data, size_t size)
467 		:
468 		NetBufferTraceEntry(buffer),
469 		fData(data),
470 		fOffset(offset),
471 		fSize(size)
472 	{
473 		Initialized();
474 	}
475 
476 	virtual void AddDump(TraceOutput& out)
477 	{
478 		out.Print("net buffer write: buffer: %p, offset: %lu, size: %lu, "
479 			"data: %p", fBuffer, fOffset, fSize, fData);
480 	}
481 
482 private:
483 	const void*		fData;
484 	uint32			fOffset;
485 	size_t			fSize;
486 };
487 
488 
489 #if NET_BUFFER_TRACING >= 2
490 
491 class DataHeaderTraceEntry : public AbstractTraceEntry {
492 public:
493 	DataHeaderTraceEntry(data_header* header)
494 		:
495 		fHeader(header)
496 	{
497 	}
498 
499 protected:
500 	data_header*	fHeader;
501 };
502 
503 
504 class CreateDataHeader : public DataHeaderTraceEntry {
505 public:
506 	CreateDataHeader(data_header* header)
507 		:
508 		DataHeaderTraceEntry(header)
509 	{
510 		Initialized();
511 	}
512 
513 	virtual void AddDump(TraceOutput& out)
514 	{
515 		out.Print("net buffer data header create:  header: %p", fHeader);
516 	}
517 };
518 
519 
520 class AcquireDataHeader : public DataHeaderTraceEntry {
521 public:
522 	AcquireDataHeader(data_header* header, int32 refCount)
523 		:
524 		DataHeaderTraceEntry(header),
525 		fRefCount(refCount)
526 	{
527 		Initialized();
528 	}
529 
530 	virtual void AddDump(TraceOutput& out)
531 	{
532 		out.Print("net buffer data header acquire: header: %p "
533 			"-> ref count: %ld", fHeader, fRefCount);
534 	}
535 
536 private:
537 	int32			fRefCount;
538 };
539 
540 
541 class ReleaseDataHeader : public DataHeaderTraceEntry {
542 public:
543 	ReleaseDataHeader(data_header* header, int32 refCount)
544 		:
545 		DataHeaderTraceEntry(header),
546 		fRefCount(refCount)
547 	{
548 		Initialized();
549 	}
550 
551 	virtual void AddDump(TraceOutput& out)
552 	{
553 		out.Print("net buffer data header release: header: %p "
554 			"-> ref count: %ld", fHeader, fRefCount);
555 	}
556 
557 private:
558 	int32			fRefCount;
559 };
560 
561 #	define T2(x)	new(std::nothrow) NetBufferTracing::x
562 #else
563 #	define T2(x)
564 #endif	// NET_BUFFER_TRACING >= 2
565 
566 }	// namespace NetBufferTracing
567 
568 #	define T(x)	new(std::nothrow) NetBufferTracing::x
569 
570 #else
571 #	define T(x)
572 #	define T2(x)
573 #endif	// NET_BUFFER_TRACING
574 
575 
576 static void
577 dump_buffer(net_buffer* _buffer)
578 {
579 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
580 
581 	dprintf("buffer %p, size %ld\n", buffer, buffer->size);
582 	data_node* node = NULL;
583 	while ((node = (data_node*)list_get_next_item(&buffer->buffers, node))
584 			!= NULL) {
585 		dprintf("  node %p, offset %lu, used %u, header %u, tail %u, "
586 			"header %p\n", node, node->offset, node->used, node->HeaderSpace(),
587 			node->TailSpace(), node->header);
588 		//dump_block((char*)node->start, node->used, "    ");
589 		dump_block((char*)node->start, min_c(node->used, 32), "    ");
590 	}
591 }
592 
593 #if ENABLE_DEBUGGER_COMMANDS
594 
595 static int
596 dump_net_buffer(int argc, char** argv)
597 {
598 	if (argc != 2) {
599 		kprintf("usage: %s [address]\n", argv[0]);
600 		return 0;
601 	}
602 
603 	dump_buffer((net_buffer*)parse_expression(argv[1]));
604 	return 0;
605 }
606 
607 #endif	// ENABLE_DEBUGGER_COMMANDS
608 
609 #if ENABLE_STATS
610 
611 static int
612 dump_net_buffer_stats(int argc, char** argv)
613 {
614 	kprintf("allocated data headers: %7ld / %7ld\n", sAllocatedDataHeaderCount,
615 		sEverAllocatedDataHeaderCount);
616 	kprintf("allocated net buffers:  %7ld / %7ld\n", sAllocatedNetBufferCount,
617 		sEverAllocatedNetBufferCount);
618 	return 0;
619 }
620 
621 #endif	// ENABLE_STATS
622 
623 #if PARANOID_BUFFER_CHECK
624 
625 static void
626 check_buffer(net_buffer* _buffer)
627 {
628 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
629 
630 	// sum up the size of all nodes
631 	size_t size = 0;
632 
633 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
634 	while (node != NULL) {
635 		if (node->offset != size) {
636 			panic("net_buffer %p: bad node %p offset (%lu vs. %lu)",
637 				buffer, node, node->offset, size);
638 			return;
639 		}
640 		size += node->used;
641 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
642 	}
643 
644 	if (size != buffer->size) {
645 		panic("net_buffer %p size != sum of its data node sizes (%lu vs. %lu)",
646 			buffer, buffer->size, size);
647 		return;
648 	}
649 }
650 
651 
652 #if 0
653 static void
654 check_buffer_contents(net_buffer* buffer, size_t offset, const void* data,
655 	size_t size)
656 {
657 	void* bufferData = malloc(size);
658 	if (bufferData == NULL)
659 		return;
660 
661 	if (read_data(buffer, offset, bufferData, size) == B_OK) {
662 		if (memcmp(bufferData, data, size) != 0) {
663 			int32 index = 0;
664 			while (((uint8*)data)[index] == ((uint8*)bufferData)[index])
665 				index++;
666 			panic("check_buffer_contents(): contents check failed at index "
667 				"%ld, buffer: %p, offset: %lu, size: %lu", index, buffer,
668 				offset, size);
669 		}
670 	} else {
671 		panic("failed to read from buffer %p, offset: %lu, size: %lu",
672 			buffer, offset, size);
673 	}
674 
675 	free(bufferData);
676 }
677 
678 
679 static void
680 check_buffer_contents(net_buffer* buffer, size_t offset, net_buffer* source,
681 	size_t sourceOffset, size_t size)
682 {
683 	void* bufferData = malloc(size);
684 	if (bufferData == NULL)
685 		return;
686 
687 	if (read_data(source, sourceOffset, bufferData, size) == B_OK) {
688 		check_buffer_contents(buffer, offset, bufferData, size);
689 	} else {
690 		panic("failed to read from source buffer %p, offset: %lu, size: %lu",
691 			source, sourceOffset, size);
692 	}
693 
694 	free(bufferData);
695 }
696 #endif
697 
698 
699 # 	define CHECK_BUFFER(buffer)	check_buffer(buffer)
700 #else
701 # 	define CHECK_BUFFER(buffer)	do {} while (false)
702 #endif	// !PARANOID_BUFFER_CHECK
703 
704 
705 static inline data_header*
706 allocate_data_header()
707 {
708 #if ENABLE_STATS
709 	atomic_add(&sAllocatedDataHeaderCount, 1);
710 	atomic_add(&sEverAllocatedDataHeaderCount, 1);
711 #endif
712 	return (data_header*)object_cache_alloc(sDataNodeCache, CACHE_DONT_SLEEP);
713 }
714 
715 
716 static inline net_buffer_private*
717 allocate_net_buffer()
718 {
719 #if ENABLE_STATS
720 	atomic_add(&sAllocatedNetBufferCount, 1);
721 	atomic_add(&sEverAllocatedNetBufferCount, 1);
722 #endif
723 	return (net_buffer_private*)object_cache_alloc(sNetBufferCache,
724 		CACHE_DONT_SLEEP);
725 }
726 
727 
728 static inline void
729 free_data_header(data_header* header)
730 {
731 #if ENABLE_STATS
732 	if (header != NULL)
733 		atomic_add(&sAllocatedDataHeaderCount, -1);
734 #endif
735 	object_cache_free(sDataNodeCache, header);
736 }
737 
738 
739 static inline void
740 free_net_buffer(net_buffer_private* buffer)
741 {
742 #if ENABLE_STATS
743 	if (buffer != NULL)
744 		atomic_add(&sAllocatedNetBufferCount, -1);
745 #endif
746 	object_cache_free(sNetBufferCache, buffer);
747 }
748 
749 
750 static data_header*
751 create_data_header(size_t headerSpace)
752 {
753 	data_header* header = allocate_data_header();
754 	if (header == NULL)
755 		return NULL;
756 
757 	header->ref_count = 1;
758 	header->physical_address = 0;
759 		// TODO: initialize this correctly
760 	header->space.size = headerSpace;
761 	header->space.free = headerSpace;
762 	header->data_end = (uint8*)header + DATA_HEADER_SIZE;
763 	header->tail_space = (uint8*)header + BUFFER_SIZE - header->data_end
764 		- headerSpace;
765 	header->first_free = NULL;
766 
767 	TRACE(("%ld:   create new data header %p\n", find_thread(NULL), header));
768 	T2(CreateDataHeader(header));
769 	return header;
770 }
771 
772 
773 static void
774 release_data_header(data_header* header)
775 {
776 	int32 refCount = atomic_add(&header->ref_count, -1);
777 	T2(ReleaseDataHeader(header, refCount - 1));
778 	if (refCount != 1)
779 		return;
780 
781 	TRACE(("%ld:   free header %p\n", find_thread(NULL), header));
782 	free_data_header(header);
783 }
784 
785 
786 inline void
787 acquire_data_header(data_header* header)
788 {
789 	int32 refCount = atomic_add(&header->ref_count, 1);
790 	(void)refCount;
791 	T2(AcquireDataHeader(header, refCount + 1));
792 }
793 
794 
795 static void
796 free_data_header_space(data_header* header, uint8* data, size_t size)
797 {
798 	if (size < sizeof(free_data))
799 		size = sizeof(free_data);
800 
801 	free_data* freeData = (free_data*)data;
802 	freeData->next = header->first_free;
803 	freeData->size = size;
804 
805 	header->first_free = freeData;
806 }
807 
808 
809 /*!	Tries to allocate \a size bytes from the free space in the header.
810 */
811 static uint8*
812 alloc_data_header_space(data_header* header, size_t size)
813 {
814 	if (size < sizeof(free_data))
815 		size = sizeof(free_data);
816 	size = _ALIGN(size);
817 
818 	if (header->first_free != NULL && header->first_free->size >= size) {
819 		// the first entry of the header space matches the allocation's needs
820 
821 		// TODO: If the free space is greater than what shall be allocated, we
822 		// leak the remainder of the space. We should only allocate multiples of
823 		// _ALIGN(sizeof(free_data)) and split free space in this case. It's not
824 		// that pressing, since the only thing allocated ATM are data_nodes, and
825 		// thus the free space entries will always have the right size.
826 		uint8* data = (uint8*)header->first_free;
827 		header->first_free = header->first_free->next;
828 		return data;
829 	}
830 
831 	if (header->space.free < size) {
832 		// there is no free space left, search free list
833 		free_data* freeData = header->first_free;
834 		free_data* last = NULL;
835 		while (freeData != NULL) {
836 			if (last != NULL && freeData->size >= size) {
837 				// take this one
838 				last->next = freeData->next;
839 				return (uint8*)freeData;
840 			}
841 
842 			last = freeData;
843 			freeData = freeData->next;
844 		}
845 
846 		return NULL;
847 	}
848 
849 	// allocate new space
850 
851 	uint8* data = header->data_end;
852 	header->data_end += size;
853 	header->space.free -= size;
854 
855 	return data;
856 }
857 
858 
859 static uint8*
860 alloc_data_header_space(net_buffer_private* buffer, size_t size,
861 	data_header** _header = NULL)
862 {
863 	// try to allocate in our current allocation header
864 	uint8* allocated = alloc_data_header_space(buffer->allocation_header, size);
865 	if (allocated == NULL) {
866 		// not enough header space left -- create a fresh buffer for headers
867 		data_header* header = create_data_header(MAX_FREE_BUFFER_SIZE);
868 		if (header == NULL)
869 			return NULL;
870 
871 		// release our reference to the old header -- it will will stay around
872 		// until the last reference to it is released
873 		release_data_header(buffer->allocation_header);
874 		buffer->allocation_header = header;
875 			// We keep the initial reference.
876 
877 		// now the allocation can only fail, if size is too big
878 		allocated = alloc_data_header_space(buffer->allocation_header, size);
879 	}
880 
881 	if (_header != NULL)
882 		*_header = buffer->allocation_header;
883 
884 	return allocated;
885 }
886 
887 
888 static data_node*
889 add_first_data_node(data_header* header)
890 {
891 	data_node* node = (data_node*)alloc_data_header_space(header,
892 		sizeof(data_node));
893 	if (node == NULL)
894 		return NULL;
895 
896 	TRACE(("%ld:   add first data node %p to header %p\n", find_thread(NULL),
897 		node, header));
898 
899 	acquire_data_header(header);
900 
901 	memset(node, 0, sizeof(struct data_node));
902 	node->located = header;
903 	node->header = header;
904 	node->offset = 0;
905 	node->start = header->data_end + header->space.free;
906 	node->used = 0;
907 	node->flags = 0;
908 
909 	return node;
910 }
911 
912 
913 static data_node*
914 add_data_node(net_buffer_private* buffer, data_header* header)
915 {
916 	data_header* located;
917 	data_node* node = (data_node*)alloc_data_header_space(buffer,
918 		sizeof(data_node), &located);
919 	if (node == NULL)
920 		return NULL;
921 
922 	TRACE(("%ld:   add data node %p to header %p\n", find_thread(NULL), node,
923 		header));
924 
925 	acquire_data_header(header);
926 	if (located != header)
927 		acquire_data_header(located);
928 
929 	memset(node, 0, sizeof(struct data_node));
930 	node->located = located;
931 	node->header = header;
932 	node->flags = 0;
933 	return node;
934 }
935 
936 
937 void
938 remove_data_node(data_node* node)
939 {
940 	data_header* located = node->located;
941 
942 	TRACE(("%ld:   remove data node %p from header %p (located %p)\n",
943 		find_thread(NULL), node, node->header, located));
944 
945 	// Move all used and tail space to the header space, which is useful in case
946 	// this is the first node of a buffer (i.e. the header is an allocation
947 	// header).
948 	node->FreeSpace();
949 
950 	if (located != node->header)
951 		release_data_header(node->header);
952 
953 	if (located == NULL)
954 		return;
955 
956 	free_data_header_space(located, (uint8*)node, sizeof(data_node));
957 
958 	release_data_header(located);
959 }
960 
961 
962 static inline data_node*
963 get_node_at_offset(net_buffer_private* buffer, size_t offset)
964 {
965 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
966 	while (node->offset + node->used <= offset) {
967 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
968 		if (node == NULL)
969 			return NULL;
970 	}
971 
972 	return node;
973 }
974 
975 
976 /*!	Appends up to \a size bytes from the data of the \a from net_buffer to the
977 	\a to net_buffer. The source buffer will remain unchanged.
978 */
979 static status_t
980 append_data_from_buffer(net_buffer* to, const net_buffer* from, size_t size)
981 {
982 	net_buffer_private* source = (net_buffer_private*)from;
983 	net_buffer_private* dest = (net_buffer_private*)to;
984 
985 	if (size > from->size)
986 		return B_BAD_VALUE;
987 	if (size == 0)
988 		return B_OK;
989 
990 	data_node* nodeTo = get_node_at_offset(source, size);
991 	if (nodeTo == NULL)
992 		return B_BAD_VALUE;
993 
994 	data_node* node = (data_node*)list_get_first_item(&source->buffers);
995 	if (node == NULL) {
996 		CHECK_BUFFER(source);
997 		return B_ERROR;
998 	}
999 
1000 	while (node != nodeTo) {
1001 		if (append_data(dest, node->start, node->used) < B_OK) {
1002 			CHECK_BUFFER(dest);
1003 			return B_ERROR;
1004 		}
1005 
1006 		node = (data_node*)list_get_next_item(&source->buffers, node);
1007 	}
1008 
1009 	int32 diff = node->offset + node->used - size;
1010 	if (append_data(dest, node->start, node->used - diff) < B_OK) {
1011 		CHECK_BUFFER(dest);
1012 		return B_ERROR;
1013 	}
1014 
1015 	CHECK_BUFFER(dest);
1016 
1017 	return B_OK;
1018 }
1019 
1020 
1021 static void
1022 copy_metadata(net_buffer* destination, const net_buffer* source)
1023 {
1024 	memcpy(destination->source, source->source,
1025 		min_c(source->source->sa_len, sizeof(sockaddr_storage)));
1026 	memcpy(destination->destination, source->destination,
1027 		min_c(source->destination->sa_len, sizeof(sockaddr_storage)));
1028 
1029 	destination->flags = source->flags;
1030 	destination->interface = source->interface;
1031 	destination->offset = source->offset;
1032 	destination->protocol = source->protocol;
1033 	destination->type = source->type;
1034 }
1035 
1036 
1037 //	#pragma mark - module API
1038 
1039 
1040 static net_buffer*
1041 create_buffer(size_t headerSpace)
1042 {
1043 	net_buffer_private* buffer = allocate_net_buffer();
1044 	if (buffer == NULL)
1045 		return NULL;
1046 
1047 	TRACE(("%ld: create buffer %p\n", find_thread(NULL), buffer));
1048 
1049 	// Make sure headerSpace is valid and at least the initial node fits.
1050 	headerSpace = _ALIGN(headerSpace);
1051 	if (headerSpace < DATA_NODE_SIZE)
1052 		headerSpace = DATA_NODE_SIZE;
1053 	else if (headerSpace > MAX_FREE_BUFFER_SIZE)
1054 		headerSpace = MAX_FREE_BUFFER_SIZE;
1055 
1056 	data_header* header = create_data_header(headerSpace);
1057 	if (header == NULL) {
1058 		free_net_buffer(buffer);
1059 		return NULL;
1060 	}
1061 	buffer->allocation_header = header;
1062 
1063 	data_node* node = add_first_data_node(header);
1064 
1065 	list_init(&buffer->buffers);
1066 	list_add_item(&buffer->buffers, node);
1067 
1068 	buffer->ancillary_data = NULL;
1069 
1070 	buffer->source = (sockaddr*)&buffer->storage.source;
1071 	buffer->destination = (sockaddr*)&buffer->storage.destination;
1072 
1073 	buffer->storage.source.ss_len = 0;
1074 	buffer->storage.destination.ss_len = 0;
1075 
1076 	buffer->interface = NULL;
1077 	buffer->offset = 0;
1078 	buffer->flags = 0;
1079 	buffer->size = 0;
1080 
1081 	buffer->type = -1;
1082 
1083 	CHECK_BUFFER(buffer);
1084 	CREATE_PARANOIA_CHECK_SET(buffer, "net_buffer");
1085 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1086 		sizeof(buffer->size));
1087 
1088 	T(Create(headerSpace, buffer));
1089 
1090 	return buffer;
1091 }
1092 
1093 
1094 static void
1095 free_buffer(net_buffer* _buffer)
1096 {
1097 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1098 
1099 	TRACE(("%ld: free buffer %p\n", find_thread(NULL), buffer));
1100 	T(Free(buffer));
1101 
1102 	CHECK_BUFFER(buffer);
1103 	DELETE_PARANOIA_CHECK_SET(buffer);
1104 
1105 	while (data_node* node
1106 			= (data_node*)list_remove_head_item(&buffer->buffers)) {
1107 		remove_data_node(node);
1108 	}
1109 
1110 	delete_ancillary_data_container(buffer->ancillary_data);
1111 
1112 	release_data_header(buffer->allocation_header);
1113 
1114 	free_net_buffer(buffer);
1115 }
1116 
1117 
1118 /*!	Creates a duplicate of the \a buffer. The new buffer does not share internal
1119 	storage; they are completely independent from each other.
1120 */
1121 static net_buffer*
1122 duplicate_buffer(net_buffer* _buffer)
1123 {
1124 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1125 
1126 	ParanoiaChecker _(buffer);
1127 
1128 	TRACE(("%ld: duplicate_buffer(buffer %p)\n", find_thread(NULL), buffer));
1129 
1130 	// TODO: We might want to choose a better header space. The minimal
1131 	// one doesn't allow to prepend any data without allocating a new header.
1132 	// The same holds for appending cloned data.
1133 	net_buffer* duplicate = create_buffer(DATA_NODE_SIZE);
1134 	if (duplicate == NULL)
1135 		return NULL;
1136 
1137 	TRACE(("%ld:   duplicate: %p)\n", find_thread(NULL), duplicate));
1138 
1139 	// copy the data from the source buffer
1140 
1141 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
1142 	while (node != NULL) {
1143 		if (append_data(duplicate, node->start, node->used) < B_OK) {
1144 			free_buffer(duplicate);
1145 			CHECK_BUFFER(buffer);
1146 			return NULL;
1147 		}
1148 
1149 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1150 	}
1151 
1152 	copy_metadata(duplicate, buffer);
1153 
1154 	ASSERT(duplicate->size == buffer->size);
1155 	CHECK_BUFFER(buffer);
1156 	CHECK_BUFFER(duplicate);
1157 	RUN_PARANOIA_CHECKS(duplicate);
1158 
1159 	T(Duplicate(buffer, duplicate));
1160 
1161 	return duplicate;
1162 }
1163 
1164 
1165 /*!	Clones the buffer by grabbing another reference to the underlying data.
1166 	If that data changes, it will be changed in the clone as well.
1167 
1168 	If \a shareFreeSpace is \c true, the cloned buffer may claim the free
1169 	space in the original buffer as the original buffer can still do. If you
1170 	are using this, it's your responsibility that only one of the buffers
1171 	will do this.
1172 */
1173 static net_buffer*
1174 clone_buffer(net_buffer* _buffer, bool shareFreeSpace)
1175 {
1176 	// TODO: See, if the commented out code can be fixed in a safe way. We could
1177 	// probably place cloned nodes on a header not belonging to our buffer, if
1178 	// we don't free the header space for the node when removing it. Otherwise we
1179 	// mess with the header's free list which might at the same time be accessed
1180 	// by another thread.
1181 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1182 
1183 	net_buffer* clone = create_buffer(MAX_FREE_BUFFER_SIZE);
1184 	if (clone == NULL)
1185 		return NULL;
1186 
1187 	if (append_cloned_data(clone, buffer, 0, buffer->size) != B_OK) {
1188 		free_buffer(clone);
1189 		return NULL;
1190 	}
1191 
1192 	copy_metadata(clone, buffer);
1193 	ASSERT(clone->size == buffer->size);
1194 
1195 	return clone;
1196 
1197 #if 0
1198 	ParanoiaChecker _(buffer);
1199 
1200 	TRACE(("%ld: clone_buffer(buffer %p)\n", find_thread(NULL), buffer));
1201 
1202 	net_buffer_private* clone = allocate_net_buffer();
1203 	if (clone == NULL)
1204 		return NULL;
1205 
1206 	TRACE(("%ld:   clone: %p\n", find_thread(NULL), buffer));
1207 
1208 	data_node* sourceNode = (data_node*)list_get_first_item(&buffer->buffers);
1209 	if (sourceNode == NULL) {
1210 		free_net_buffer(clone);
1211 		return NULL;
1212 	}
1213 
1214 	clone->source = (sockaddr*)&clone->storage.source;
1215 	clone->destination = (sockaddr*)&clone->storage.destination;
1216 
1217 	list_init(&clone->buffers);
1218 
1219 	// grab reference to this buffer - all additional nodes will get
1220 	// theirs in add_data_node()
1221 	acquire_data_header(sourceNode->header);
1222 	data_node* node = &clone->first_node;
1223 	node->header = sourceNode->header;
1224 	node->located = NULL;
1225 	node->used_header_space = &node->own_header_space;
1226 
1227 	while (sourceNode != NULL) {
1228 		node->start = sourceNode->start;
1229 		node->used = sourceNode->used;
1230 		node->offset = sourceNode->offset;
1231 
1232 		if (shareFreeSpace) {
1233 			// both buffers could claim the free space - note that this option
1234 			// has to be used carefully
1235 			node->used_header_space = &sourceNode->header->space;
1236 			node->tail_space = sourceNode->tail_space;
1237 		} else {
1238 			// the free space stays with the original buffer
1239 			node->used_header_space->size = 0;
1240 			node->used_header_space->free = 0;
1241 			node->tail_space = 0;
1242 		}
1243 
1244 		// add node to clone's list of buffers
1245 		list_add_item(&clone->buffers, node);
1246 
1247 		sourceNode = (data_node*)list_get_next_item(&buffer->buffers,
1248 			sourceNode);
1249 		if (sourceNode == NULL)
1250 			break;
1251 
1252 		node = add_data_node(sourceNode->header);
1253 		if (node == NULL) {
1254 			// There was not enough space left for another node in this buffer
1255 			// TODO: handle this case!
1256 			panic("clone buffer hits size limit... (fix me)");
1257 			free_net_buffer(clone);
1258 			return NULL;
1259 		}
1260 	}
1261 
1262 	copy_metadata(clone, buffer);
1263 
1264 	ASSERT(clone->size == buffer->size);
1265 	CREATE_PARANOIA_CHECK_SET(clone, "net_buffer");
1266 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, clone, &clone->size,
1267 		sizeof(clone->size));
1268 	CHECK_BUFFER(buffer);
1269 	CHECK_BUFFER(clone);
1270 
1271 	T(Clone(buffer, shareFreeSpace, clone));
1272 
1273 	return clone;
1274 #endif
1275 }
1276 
1277 
1278 /*!	Split the buffer at offset, the header data
1279 	is returned as new buffer.
1280 */
1281 static net_buffer*
1282 split_buffer(net_buffer* from, uint32 offset)
1283 {
1284 	net_buffer* buffer = create_buffer(DATA_NODE_SIZE);
1285 	if (buffer == NULL)
1286 		return NULL;
1287 
1288 	copy_metadata(buffer, from);
1289 
1290 	ParanoiaChecker _(from);
1291 	ParanoiaChecker _2(buffer);
1292 
1293 	TRACE(("%ld: split_buffer(buffer %p -> %p, offset %ld)\n",
1294 		find_thread(NULL), from, buffer, offset));
1295 
1296 	if (append_data_from_buffer(buffer, from, offset) == B_OK) {
1297 		if (remove_header(from, offset) == B_OK) {
1298 			CHECK_BUFFER(from);
1299 			CHECK_BUFFER(buffer);
1300 			T(Split(from, offset, buffer));
1301 			return buffer;
1302 		}
1303 	}
1304 
1305 	free_buffer(buffer);
1306 	CHECK_BUFFER(from);
1307 	return NULL;
1308 }
1309 
1310 
1311 /*!	Merges the second buffer with the first. If \a after is \c true, the
1312 	second buffer's contents will be appended to the first ones, else they
1313 	will be prepended.
1314 	The second buffer will be freed if this function succeeds.
1315 */
1316 static status_t
1317 merge_buffer(net_buffer* _buffer, net_buffer* _with, bool after)
1318 {
1319 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1320 	net_buffer_private* with = (net_buffer_private*)_with;
1321 	if (with == NULL)
1322 		return B_BAD_VALUE;
1323 
1324 	TRACE(("%ld: merge buffer %p with %p (%s)\n", find_thread(NULL), buffer,
1325 		with, after ? "after" : "before"));
1326 	T(Merge(buffer, with, after));
1327 	//dump_buffer(buffer);
1328 	//dprintf("with:\n");
1329 	//dump_buffer(with);
1330 
1331 	ParanoiaChecker _(buffer);
1332 	CHECK_BUFFER(buffer);
1333 	CHECK_BUFFER(with);
1334 
1335 	// TODO: this is currently very simplistic, I really need to finish the
1336 	//	harder part of this implementation (data_node management per header)
1337 
1338 	data_node* before = NULL;
1339 
1340 	// TODO: Do allocating nodes (the only part that can fail) upfront. Put them
1341 	// in a list, so we can easily clean up, if necessary.
1342 
1343 	if (!after) {
1344 		// change offset of all nodes already in the buffer
1345 		data_node* node = NULL;
1346 		while (true) {
1347 			node = (data_node*)list_get_next_item(&buffer->buffers, node);
1348 			if (node == NULL)
1349 				break;
1350 
1351 			node->offset += with->size;
1352 			if (before == NULL)
1353 				before = node;
1354 		}
1355 	}
1356 
1357 	data_node* last = NULL;
1358 
1359 	while (true) {
1360 		data_node* node = (data_node*)list_get_next_item(&with->buffers, last);
1361 		if (node == NULL)
1362 			break;
1363 
1364 		if ((uint8*)node > (uint8*)node->header
1365 			&& (uint8*)node < (uint8*)node->header + BUFFER_SIZE) {
1366 			// The node is already in the buffer, we can just move it
1367 			// over to the new owner
1368 			list_remove_item(&with->buffers, node);
1369 			with->size -= node->used;
1370 		} else {
1371 			// we need a new place for this node
1372 			data_node* newNode = add_data_node(buffer, node->header);
1373 			if (newNode == NULL) {
1374 				// TODO: try to revert buffers to their initial state!!
1375 				return ENOBUFS;
1376 			}
1377 
1378 			last = node;
1379 			*newNode = *node;
1380 			node = newNode;
1381 				// the old node will get freed with its buffer
1382 		}
1383 
1384 		if (after) {
1385 			list_add_item(&buffer->buffers, node);
1386 			node->offset = buffer->size;
1387 		} else
1388 			list_insert_item_before(&buffer->buffers, before, node);
1389 
1390 		buffer->size += node->used;
1391 	}
1392 
1393 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1394 		sizeof(buffer->size));
1395 
1396 	// the data has been merged completely at this point
1397 	free_buffer(with);
1398 
1399 	//dprintf(" merge result:\n");
1400 	//dump_buffer(buffer);
1401 	CHECK_BUFFER(buffer);
1402 
1403 	return B_OK;
1404 }
1405 
1406 
1407 /*!	Writes into existing allocated memory.
1408 	\return B_BAD_VALUE if you write outside of the buffers current
1409 		bounds.
1410 */
1411 static status_t
1412 write_data(net_buffer* _buffer, size_t offset, const void* data, size_t size)
1413 {
1414 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1415 
1416 	T(Write(buffer, offset, data, size));
1417 
1418 	ParanoiaChecker _(buffer);
1419 
1420 	if (offset + size > buffer->size)
1421 		return B_BAD_VALUE;
1422 	if (size == 0)
1423 		return B_OK;
1424 
1425 	// find first node to write into
1426 	data_node* node = get_node_at_offset(buffer, offset);
1427 	if (node == NULL)
1428 		return B_BAD_VALUE;
1429 
1430 	offset -= node->offset;
1431 
1432 	while (true) {
1433 		size_t written = min_c(size, node->used - offset);
1434 		if (IS_USER_ADDRESS(data)) {
1435 			if (user_memcpy(node->start + offset, data, written) != B_OK)
1436 				return B_BAD_ADDRESS;
1437 		} else
1438 			memcpy(node->start + offset, data, written);
1439 
1440 		size -= written;
1441 		if (size == 0)
1442 			break;
1443 
1444 		offset = 0;
1445 		data = (void*)((uint8*)data + written);
1446 
1447 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1448 		if (node == NULL)
1449 			return B_BAD_VALUE;
1450 	}
1451 
1452 	CHECK_BUFFER(buffer);
1453 
1454 	return B_OK;
1455 }
1456 
1457 
1458 static status_t
1459 read_data(net_buffer* _buffer, size_t offset, void* data, size_t size)
1460 {
1461 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1462 
1463 	T(Read(buffer, offset, data, size));
1464 
1465 	ParanoiaChecker _(buffer);
1466 
1467 	if (offset + size > buffer->size)
1468 		return B_BAD_VALUE;
1469 	if (size == 0)
1470 		return B_OK;
1471 
1472 	// find first node to read from
1473 	data_node* node = get_node_at_offset(buffer, offset);
1474 	if (node == NULL)
1475 		return B_BAD_VALUE;
1476 
1477 	offset -= node->offset;
1478 
1479 	while (true) {
1480 		size_t bytesRead = min_c(size, node->used - offset);
1481 		if (IS_USER_ADDRESS(data)) {
1482 			if (user_memcpy(data, node->start + offset, bytesRead) != B_OK)
1483 				return B_BAD_ADDRESS;
1484 		} else
1485 			memcpy(data, node->start + offset, bytesRead);
1486 
1487 		size -= bytesRead;
1488 		if (size == 0)
1489 			break;
1490 
1491 		offset = 0;
1492 		data = (void*)((uint8*)data + bytesRead);
1493 
1494 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1495 		if (node == NULL)
1496 			return B_BAD_VALUE;
1497 	}
1498 
1499 	CHECK_BUFFER(buffer);
1500 
1501 	return B_OK;
1502 }
1503 
1504 
1505 static status_t
1506 prepend_size(net_buffer* _buffer, size_t size, void** _contiguousBuffer)
1507 {
1508 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1509 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
1510 
1511 	T(PrependSize(buffer, size));
1512 
1513 	ParanoiaChecker _(buffer);
1514 
1515 	TRACE(("%ld: prepend_size(buffer %p, size %ld) [has %u]\n",
1516 		find_thread(NULL), buffer, size, node->HeaderSpace()));
1517 	//dump_buffer(buffer);
1518 
1519 	if (node->HeaderSpace() < size) {
1520 		// we need to prepend new buffers
1521 
1522 		size_t bytesLeft = size;
1523 		size_t sizePrepended = 0;
1524 		do {
1525 			if (node->HeaderSpace() == 0) {
1526 				size_t headerSpace = MAX_FREE_BUFFER_SIZE;
1527 				data_header* header = create_data_header(headerSpace);
1528 				if (header == NULL) {
1529 					remove_header(buffer, sizePrepended);
1530 					return B_NO_MEMORY;
1531 				}
1532 
1533 				data_node* previous = node;
1534 
1535 				node = (data_node*)add_first_data_node(header);
1536 
1537 				list_insert_item_before(&buffer->buffers, previous, node);
1538 
1539 				// Release the initial reference to the header, so that it will
1540 				// be deleted when the node is removed.
1541 				release_data_header(header);
1542 			}
1543 
1544 			size_t willConsume = min_c(bytesLeft, node->HeaderSpace());
1545 
1546 			node->SubtractHeaderSpace(willConsume);
1547 			node->start -= willConsume;
1548 			node->used += willConsume;
1549 			bytesLeft -= willConsume;
1550 			sizePrepended += willConsume;
1551 		} while (bytesLeft > 0);
1552 
1553 		// correct data offset in all nodes
1554 
1555 		size_t offset = 0;
1556 		node = NULL;
1557 		while ((node = (data_node*)list_get_next_item(&buffer->buffers,
1558 				node)) != NULL) {
1559 			node->offset = offset;
1560 			offset += node->used;
1561 		}
1562 
1563 		if (_contiguousBuffer)
1564 			*_contiguousBuffer = NULL;
1565 	} else {
1566 		// the data fits into this buffer
1567 		node->SubtractHeaderSpace(size);
1568 		node->start -= size;
1569 		node->used += size;
1570 
1571 		if (_contiguousBuffer)
1572 			*_contiguousBuffer = node->start;
1573 
1574 		// adjust offset of following nodes
1575 		while ((node = (data_node*)list_get_next_item(&buffer->buffers, node))
1576 				!= NULL) {
1577 			node->offset += size;
1578 		}
1579 	}
1580 
1581 	buffer->size += size;
1582 
1583 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1584 		sizeof(buffer->size));
1585 
1586 	//dprintf(" prepend_size result:\n");
1587 	//dump_buffer(buffer);
1588 	CHECK_BUFFER(buffer);
1589 	return B_OK;
1590 }
1591 
1592 
1593 static status_t
1594 prepend_data(net_buffer* buffer, const void* data, size_t size)
1595 {
1596 	void* contiguousBuffer;
1597 	status_t status = prepend_size(buffer, size, &contiguousBuffer);
1598 	if (status < B_OK)
1599 		return status;
1600 
1601 	if (contiguousBuffer) {
1602 		if (IS_USER_ADDRESS(data)) {
1603 			if (user_memcpy(contiguousBuffer, data, size) != B_OK)
1604 				return B_BAD_ADDRESS;
1605 		} else
1606 			memcpy(contiguousBuffer, data, size);
1607 	} else
1608 		write_data(buffer, 0, data, size);
1609 
1610 	//dprintf(" prepend result:\n");
1611 	//dump_buffer(buffer);
1612 
1613 	return B_OK;
1614 }
1615 
1616 
1617 static status_t
1618 append_size(net_buffer* _buffer, size_t size, void** _contiguousBuffer)
1619 {
1620 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1621 	data_node* node = (data_node*)list_get_last_item(&buffer->buffers);
1622 
1623 	T(AppendSize(buffer, size));
1624 
1625 	ParanoiaChecker _(buffer);
1626 
1627 	TRACE(("%ld: append_size(buffer %p, size %ld)\n", find_thread(NULL),
1628 		buffer, size));
1629 	//dump_buffer(buffer);
1630 
1631 	if (node->TailSpace() < size) {
1632 		// we need to append at least one new buffer
1633 		uint32 previousTailSpace = node->TailSpace();
1634 		uint32 headerSpace = DATA_NODE_SIZE;
1635 		uint32 sizeUsed = MAX_FREE_BUFFER_SIZE - headerSpace;
1636 
1637 		// allocate space left in the node
1638 		node->SetTailSpace(0);
1639 		node->used += previousTailSpace;
1640 		buffer->size += previousTailSpace;
1641 		uint32 sizeAdded = previousTailSpace;
1642 		SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1643 			sizeof(buffer->size));
1644 
1645 		// allocate all buffers
1646 
1647 		while (sizeAdded < size) {
1648 			if (sizeAdded + sizeUsed > size) {
1649 				// last data_header and not all available space is used
1650 				sizeUsed = size - sizeAdded;
1651 			}
1652 
1653 			data_header* header = create_data_header(headerSpace);
1654 			if (header == NULL) {
1655 				remove_trailer(buffer, sizeAdded);
1656 				return B_NO_MEMORY;
1657 			}
1658 
1659 			node = add_first_data_node(header);
1660 			if (node == NULL) {
1661 				release_data_header(header);
1662 				return B_NO_MEMORY;
1663 			}
1664 
1665 			node->SetTailSpace(node->TailSpace() - sizeUsed);
1666 			node->used = sizeUsed;
1667 			node->offset = buffer->size;
1668 
1669 			buffer->size += sizeUsed;
1670 			sizeAdded += sizeUsed;
1671 			SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1672 				sizeof(buffer->size));
1673 
1674 			list_add_item(&buffer->buffers, node);
1675 
1676 			// Release the initial reference to the header, so that it will
1677 			// be deleted when the node is removed.
1678 			release_data_header(header);
1679 		}
1680 
1681 		if (_contiguousBuffer)
1682 			*_contiguousBuffer = NULL;
1683 
1684 		//dprintf(" append result 1:\n");
1685 		//dump_buffer(buffer);
1686 		CHECK_BUFFER(buffer);
1687 
1688 		return B_OK;
1689 	}
1690 
1691 	// the data fits into this buffer
1692 	node->SetTailSpace(node->TailSpace() - size);
1693 
1694 	if (_contiguousBuffer)
1695 		*_contiguousBuffer = node->start + node->used;
1696 
1697 	node->used += size;
1698 	buffer->size += size;
1699 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1700 		sizeof(buffer->size));
1701 
1702 	//dprintf(" append result 2:\n");
1703 	//dump_buffer(buffer);
1704 	CHECK_BUFFER(buffer);
1705 
1706 	return B_OK;
1707 }
1708 
1709 
1710 static status_t
1711 append_data(net_buffer* buffer, const void* data, size_t size)
1712 {
1713 	size_t used = buffer->size;
1714 
1715 	void* contiguousBuffer;
1716 	status_t status = append_size(buffer, size, &contiguousBuffer);
1717 	if (status < B_OK)
1718 		return status;
1719 
1720 	if (contiguousBuffer) {
1721 		if (IS_USER_ADDRESS(data)) {
1722 			if (user_memcpy(contiguousBuffer, data, size) != B_OK)
1723 				return B_BAD_ADDRESS;
1724 		} else
1725 			memcpy(contiguousBuffer, data, size);
1726 	} else
1727 		write_data(buffer, used, data, size);
1728 
1729 	return B_OK;
1730 }
1731 
1732 
1733 /*!	Removes bytes from the beginning of the buffer.
1734 */
1735 static status_t
1736 remove_header(net_buffer* _buffer, size_t bytes)
1737 {
1738 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1739 
1740 	T(RemoveHeader(buffer, bytes));
1741 
1742 	ParanoiaChecker _(buffer);
1743 
1744 	if (bytes > buffer->size)
1745 		return B_BAD_VALUE;
1746 
1747 	TRACE(("%ld: remove_header(buffer %p, %ld bytes)\n", find_thread(NULL),
1748 		buffer, bytes));
1749 	//dump_buffer(buffer);
1750 
1751 	size_t left = bytes;
1752 	data_node* node = NULL;
1753 
1754 	while (left >= 0) {
1755 		node = (data_node*)list_get_first_item(&buffer->buffers);
1756 		if (node == NULL) {
1757 			if (left == 0)
1758 				break;
1759 			CHECK_BUFFER(buffer);
1760 			return B_ERROR;
1761 		}
1762 
1763 		if (node->used > left)
1764 			break;
1765 
1766 		// node will be removed completely
1767 		list_remove_item(&buffer->buffers, node);
1768 		left -= node->used;
1769 		remove_data_node(node);
1770 		node = NULL;
1771 	}
1772 
1773 	// cut remaining node, if any
1774 
1775 	if (node != NULL) {
1776 		size_t cut = min_c(node->used, left);
1777 		node->offset = 0;
1778 		node->start += cut;
1779 		node->AddHeaderSpace(cut);
1780 		node->used -= cut;
1781 
1782 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1783 	}
1784 
1785 	// adjust offset of following nodes
1786 	while (node != NULL) {
1787 		node->offset -= bytes;
1788 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1789 	}
1790 
1791 	buffer->size -= bytes;
1792 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1793 		sizeof(buffer->size));
1794 
1795 	//dprintf(" remove result:\n");
1796 	//dump_buffer(buffer);
1797 	CHECK_BUFFER(buffer);
1798 
1799 	return B_OK;
1800 }
1801 
1802 
1803 /*!	Removes bytes from the end of the buffer.
1804 */
1805 static status_t
1806 remove_trailer(net_buffer* buffer, size_t bytes)
1807 {
1808 	return trim_data(buffer, buffer->size - bytes);
1809 }
1810 
1811 
1812 /*!	Trims the buffer to the specified \a newSize by removing space from
1813 	the end of the buffer.
1814 */
1815 static status_t
1816 trim_data(net_buffer* _buffer, size_t newSize)
1817 {
1818 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1819 	TRACE(("%ld: trim_data(buffer %p, newSize = %ld, buffer size = %ld)\n",
1820 		find_thread(NULL), buffer, newSize, buffer->size));
1821 	T(Trim(buffer, newSize));
1822 	//dump_buffer(buffer);
1823 
1824 	ParanoiaChecker _(buffer);
1825 
1826 	if (newSize > buffer->size)
1827 		return B_BAD_VALUE;
1828 	if (newSize == buffer->size)
1829 		return B_OK;
1830 
1831 	data_node* node = get_node_at_offset(buffer, newSize);
1832 	if (node == NULL) {
1833 		// trim size greater than buffer size
1834 		return B_BAD_VALUE;
1835 	}
1836 
1837 	int32 diff = node->used + node->offset - newSize;
1838 	node->SetTailSpace(node->TailSpace() + diff);
1839 	node->used -= diff;
1840 
1841 	if (node->used > 0)
1842 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1843 
1844 	while (node != NULL) {
1845 		data_node* next = (data_node*)list_get_next_item(&buffer->buffers, node);
1846 		list_remove_item(&buffer->buffers, node);
1847 		remove_data_node(node);
1848 
1849 		node = next;
1850 	}
1851 
1852 	buffer->size = newSize;
1853 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1854 		sizeof(buffer->size));
1855 
1856 	//dprintf(" trim result:\n");
1857 	//dump_buffer(buffer);
1858 	CHECK_BUFFER(buffer);
1859 
1860 	return B_OK;
1861 }
1862 
1863 
1864 /*!	Appends data coming from buffer \a source to the buffer \a buffer. It only
1865 	clones the data, though, that is the data is not copied, just referenced.
1866 */
1867 static status_t
1868 append_cloned_data(net_buffer* _buffer, net_buffer* _source, uint32 offset,
1869 	size_t bytes)
1870 {
1871 	if (bytes == 0)
1872 		return B_OK;
1873 
1874 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1875 	net_buffer_private* source = (net_buffer_private*)_source;
1876 	TRACE(("%ld: append_cloned_data(buffer %p, source %p, offset = %ld, "
1877 		"bytes = %ld)\n", find_thread(NULL), buffer, source, offset, bytes));
1878 	T(AppendCloned(buffer, source, offset, bytes));
1879 
1880 	ParanoiaChecker _(buffer);
1881 	ParanoiaChecker _2(source);
1882 
1883 	if (source->size < offset + bytes || source->size < offset)
1884 		return B_BAD_VALUE;
1885 
1886 	// find data_node to start with from the source buffer
1887 	data_node* node = get_node_at_offset(source, offset);
1888 	if (node == NULL) {
1889 		// trim size greater than buffer size
1890 		return B_BAD_VALUE;
1891 	}
1892 
1893 	size_t sizeAppended = 0;
1894 
1895 	while (node != NULL && bytes > 0) {
1896 		data_node* clone = add_data_node(buffer, node->header);
1897 		if (clone == NULL) {
1898 			remove_trailer(buffer, sizeAppended);
1899 			return ENOBUFS;
1900 		}
1901 
1902 		if (offset)
1903 			offset -= node->offset;
1904 
1905 		clone->offset = buffer->size;
1906 		clone->start = node->start + offset;
1907 		clone->used = min_c(bytes, node->used - offset);
1908 		clone->flags |= DATA_NODE_READ_ONLY;
1909 
1910 		list_add_item(&buffer->buffers, clone);
1911 
1912 		offset = 0;
1913 		bytes -= clone->used;
1914 		buffer->size += clone->used;
1915 		sizeAppended += clone->used;
1916 		node = (data_node*)list_get_next_item(&source->buffers, node);
1917 	}
1918 
1919 	if (bytes != 0)
1920 		panic("add_cloned_data() failed, bytes != 0!\n");
1921 
1922 	//dprintf(" append cloned result:\n");
1923 	//dump_buffer(buffer);
1924 	CHECK_BUFFER(source);
1925 	CHECK_BUFFER(buffer);
1926 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1927 		sizeof(buffer->size));
1928 
1929 	return B_OK;
1930 }
1931 
1932 
1933 void
1934 set_ancillary_data(net_buffer* buffer, ancillary_data_container* container)
1935 {
1936 	((net_buffer_private*)buffer)->ancillary_data = container;
1937 }
1938 
1939 
1940 ancillary_data_container*
1941 get_ancillary_data(net_buffer* buffer)
1942 {
1943 	return ((net_buffer_private*)buffer)->ancillary_data;
1944 }
1945 
1946 
1947 /*!	Moves all ancillary data from buffer \c from to the end of the list of
1948 	ancillary data of buffer \c to. Note, that this is the only function that
1949 	transfers or copies ancillary data from one buffer to another.
1950 
1951 	\param from The buffer from which to remove the ancillary data.
1952 	\param to The buffer to which to add the ancillary data.
1953 	\return A pointer to the first of the moved ancillary data, if any, \c NULL
1954 		otherwise.
1955 */
1956 static void*
1957 transfer_ancillary_data(net_buffer* _from, net_buffer* _to)
1958 {
1959 	net_buffer_private* from = (net_buffer_private*)_from;
1960 	net_buffer_private* to = (net_buffer_private*)_to;
1961 
1962 	if (from == NULL || to == NULL)
1963 		return NULL;
1964 
1965 	if (from->ancillary_data == NULL)
1966 		return NULL;
1967 
1968 	if (to->ancillary_data == NULL) {
1969 		// no ancillary data in the target buffer
1970 		to->ancillary_data = from->ancillary_data;
1971 		from->ancillary_data = NULL;
1972 		return next_ancillary_data(to->ancillary_data, NULL, NULL);
1973 	}
1974 
1975 	// both have ancillary data
1976 	void* data = move_ancillary_data(from->ancillary_data,
1977 		to->ancillary_data);
1978 	delete_ancillary_data_container(from->ancillary_data);
1979 	from->ancillary_data = NULL;
1980 
1981 	return data;
1982 }
1983 
1984 
1985 /*!	Tries to directly access the requested space in the buffer.
1986 	If the space is contiguous, the function will succeed and place a pointer
1987 	to that space into \a _contiguousBuffer.
1988 
1989 	\return B_BAD_VALUE if the offset is outside of the buffer's bounds.
1990 	\return B_ERROR in case the buffer is not contiguous at that location.
1991 */
1992 static status_t
1993 direct_access(net_buffer* _buffer, uint32 offset, size_t size,
1994 	void** _contiguousBuffer)
1995 {
1996 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1997 
1998 	ParanoiaChecker _(buffer);
1999 
2000 	//TRACE(("direct_access(buffer %p, offset %ld, size %ld)\n", buffer, offset,
2001 	//	size));
2002 
2003 	if (offset + size > buffer->size)
2004 		return B_BAD_VALUE;
2005 
2006 	// find node to access
2007 	data_node* node = get_node_at_offset(buffer, offset);
2008 	if (node == NULL)
2009 		return B_BAD_VALUE;
2010 
2011 	offset -= node->offset;
2012 
2013 	if (size > node->used - offset)
2014 		return B_ERROR;
2015 
2016 	*_contiguousBuffer = node->start + offset;
2017 	return B_OK;
2018 }
2019 
2020 
2021 static int32
2022 checksum_data(net_buffer* _buffer, uint32 offset, size_t size, bool finalize)
2023 {
2024 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
2025 
2026 	if (offset + size > buffer->size || size == 0)
2027 		return B_BAD_VALUE;
2028 
2029 	// find first node to read from
2030 	data_node* node = get_node_at_offset(buffer, offset);
2031 	if (node == NULL)
2032 		return B_ERROR;
2033 
2034 	offset -= node->offset;
2035 
2036 	// Since the maximum buffer size is 65536 bytes, it's impossible
2037 	// to overlap 32 bit - we don't need to handle this overlap in
2038 	// the loop, we can safely do it afterwards
2039 	uint32 sum = 0;
2040 
2041 	while (true) {
2042 		size_t bytes = min_c(size, node->used - offset);
2043 		if ((offset + node->offset) & 1) {
2044 			// if we're at an uneven offset, we have to swap the checksum
2045 			sum += __swap_int16(compute_checksum(node->start + offset, bytes));
2046 		} else
2047 			sum += compute_checksum(node->start + offset, bytes);
2048 
2049 		size -= bytes;
2050 		if (size == 0)
2051 			break;
2052 
2053 		offset = 0;
2054 
2055 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
2056 		if (node == NULL)
2057 			return B_ERROR;
2058 	}
2059 
2060 	while (sum >> 16) {
2061 		sum = (sum & 0xffff) + (sum >> 16);
2062 	}
2063 
2064 	if (!finalize)
2065 		return (uint16)sum;
2066 
2067 	return (uint16)~sum;
2068 }
2069 
2070 
2071 static uint32
2072 get_iovecs(net_buffer* _buffer, struct iovec* iovecs, uint32 vecCount)
2073 {
2074 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
2075 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
2076 	uint32 count = 0;
2077 
2078 	while (node != NULL && count < vecCount) {
2079 		if (node->used > 0) {
2080 			iovecs[count].iov_base = node->start;
2081 			iovecs[count].iov_len = node->used;
2082 			count++;
2083 		}
2084 
2085 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
2086 	}
2087 
2088 	return count;
2089 }
2090 
2091 
2092 static uint32
2093 count_iovecs(net_buffer* _buffer)
2094 {
2095 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
2096 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
2097 	uint32 count = 0;
2098 
2099 	while (node != NULL) {
2100 		if (node->used > 0)
2101 			count++;
2102 
2103 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
2104 	}
2105 
2106 	return count;
2107 }
2108 
2109 
2110 static void
2111 swap_addresses(net_buffer* buffer)
2112 {
2113 	std::swap(buffer->source, buffer->destination);
2114 }
2115 
2116 
2117 static status_t
2118 std_ops(int32 op, ...)
2119 {
2120 	switch (op) {
2121 		case B_MODULE_INIT:
2122 			// TODO: improve our code a bit so we can add constructors
2123 			//	and keep around half-constructed buffers in the slab
2124 
2125 			sNetBufferCache = create_object_cache("net buffer cache",
2126 				sizeof(net_buffer_private), 8, NULL, NULL, NULL);
2127 			if (sNetBufferCache == NULL)
2128 				return B_NO_MEMORY;
2129 
2130 			sDataNodeCache = create_object_cache("data node cache", BUFFER_SIZE,
2131 				0, NULL, NULL, NULL);
2132 			if (sDataNodeCache == NULL) {
2133 				delete_object_cache(sNetBufferCache);
2134 				return B_NO_MEMORY;
2135 			}
2136 
2137 #if ENABLE_STATS
2138 			add_debugger_command_etc("net_buffer_stats", &dump_net_buffer_stats,
2139 				"Print net buffer statistics",
2140 				"\nPrint net buffer statistics.\n", 0);
2141 #endif
2142 #if ENABLE_DEBUGGER_COMMANDS
2143 			add_debugger_command_etc("net_buffer", &dump_net_buffer,
2144 				"Dump net buffer",
2145 				"\nDump the net buffer's internal structures.\n", 0);
2146 #endif
2147 			return B_OK;
2148 
2149 		case B_MODULE_UNINIT:
2150 #if ENABLE_STATS
2151 			remove_debugger_command("net_buffer_stats", &dump_net_buffer_stats);
2152 #endif
2153 #if ENABLE_DEBUGGER_COMMANDS
2154 			remove_debugger_command("net_buffer", &dump_net_buffer);
2155 #endif
2156 			delete_object_cache(sNetBufferCache);
2157 			delete_object_cache(sDataNodeCache);
2158 			return B_OK;
2159 
2160 		default:
2161 			return B_ERROR;
2162 	}
2163 }
2164 
2165 
2166 net_buffer_module_info gNetBufferModule = {
2167 	{
2168 		NET_BUFFER_MODULE_NAME,
2169 		0,
2170 		std_ops
2171 	},
2172 	create_buffer,
2173 	free_buffer,
2174 
2175 	duplicate_buffer,
2176 	clone_buffer,
2177 	split_buffer,
2178 	merge_buffer,
2179 
2180 	prepend_size,
2181 	prepend_data,
2182 	append_size,
2183 	append_data,
2184 	NULL,	// insert
2185 	NULL,	// remove
2186 	remove_header,
2187 	remove_trailer,
2188 	trim_data,
2189 	append_cloned_data,
2190 
2191 	NULL,	// associate_data
2192 
2193 	set_ancillary_data,
2194 	get_ancillary_data,
2195 	transfer_ancillary_data,
2196 
2197 	direct_access,
2198 	read_data,
2199 	write_data,
2200 
2201 	checksum_data,
2202 
2203 	NULL,	// get_memory_map
2204 	get_iovecs,
2205 	count_iovecs,
2206 
2207 	swap_addresses,
2208 
2209 	dump_buffer,	// dump
2210 };
2211 
2212