xref: /haiku/src/add-ons/kernel/network/stack/net_buffer.cpp (revision 2069f565e8ed3556b30cf2f5e3aa54450128b8c2)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *		Ingo Weinhold, ingo_weinhold@gmx.de
8  */
9 
10 
11 #include "utility.h"
12 
13 #include <net_buffer.h>
14 #include <slab/Slab.h>
15 #include <tracing.h>
16 #include <util/list.h>
17 
18 #include <ByteOrder.h>
19 #include <debug.h>
20 #include <kernel.h>
21 #include <KernelExport.h>
22 #include <util/DoublyLinkedList.h>
23 
24 #include <algorithm>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <sys/param.h>
28 #include <sys/uio.h>
29 
30 #include "ancillary_data.h"
31 
32 #include "paranoia_config.h"
33 
34 
35 //#define TRACE_BUFFER
36 #ifdef TRACE_BUFFER
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 #define BUFFER_SIZE 2048
43 	// maximum implementation derived buffer size is 65536
44 
45 #define ENABLE_DEBUGGER_COMMANDS	1
46 //#define ENABLE_STATS				1
47 #define PARANOID_BUFFER_CHECK		NET_BUFFER_PARANOIA
48 
49 #define COMPONENT_PARANOIA_LEVEL	NET_BUFFER_PARANOIA
50 #include <debug_paranoia.h>
51 
52 #define DATA_NODE_READ_ONLY		0x1
53 
54 struct header_space {
55 	uint16	size;
56 	uint16	free;
57 };
58 
59 struct free_data {
60 	struct free_data* next;
61 	uint16			size;
62 };
63 
64 struct data_header {
65 	int32			ref_count;
66 	addr_t			physical_address;
67 	free_data*		first_free;
68 	uint8*			data_end;
69 	header_space	space;
70 	uint16			tail_space;
71 };
72 
73 struct data_node {
74 	struct list_link link;
75 	struct data_header* header;
76 	struct data_header* located;
77 	size_t			offset;		// the net_buffer-wide offset of this node
78 	uint8*			start;		// points to the start of the data
79 	uint16			flags;
80 	uint16			used;		// defines how much memory is used by this node
81 
82 	uint16 HeaderSpace() const
83 	{
84 		if ((flags & DATA_NODE_READ_ONLY) != 0)
85 			return 0;
86 		return header->space.free;
87 	}
88 
89 	void AddHeaderSpace(uint16 toAdd)
90 	{
91 		if ((flags & DATA_NODE_READ_ONLY) == 0) {
92 			header->space.size += toAdd;
93 			header->space.free += toAdd;
94 		}
95 	}
96 
97 	void SubtractHeaderSpace(uint16 toSubtract)
98 	{
99 		if ((flags & DATA_NODE_READ_ONLY) == 0) {
100 			header->space.size -= toSubtract;
101 			header->space.free -= toSubtract;
102 		}
103 	}
104 
105 	uint16 TailSpace() const
106 	{
107 		if ((flags & DATA_NODE_READ_ONLY) != 0)
108 			return 0;
109 		return header->tail_space;
110 	}
111 
112 	void SetTailSpace(uint16 space)
113 	{
114 		if ((flags & DATA_NODE_READ_ONLY) == 0)
115 			header->tail_space = space;
116 	}
117 
118 	void FreeSpace()
119 	{
120 		if ((flags & DATA_NODE_READ_ONLY) == 0) {
121 			uint16 space = used + header->tail_space;
122 			header->space.size += space;
123 			header->space.free += space;
124 			header->tail_space = 0;
125 		}
126 	}
127 };
128 
129 struct net_buffer_private : net_buffer {
130 	struct list					buffers;
131 	data_header*				allocation_header;
132 		// the current place where we allocate header space (nodes, ...)
133 	ancillary_data_container*	ancillary_data;
134 
135 	struct {
136 		struct sockaddr_storage	source;
137 		struct sockaddr_storage	destination;
138 	} storage;
139 };
140 
141 
142 #define DATA_HEADER_SIZE				_ALIGN(sizeof(data_header))
143 #define DATA_NODE_SIZE					_ALIGN(sizeof(data_node))
144 #define MAX_FREE_BUFFER_SIZE			(BUFFER_SIZE - DATA_HEADER_SIZE)
145 
146 
147 static object_cache* sNetBufferCache;
148 static object_cache* sDataNodeCache;
149 
150 
151 static status_t append_data(net_buffer* buffer, const void* data, size_t size);
152 static status_t trim_data(net_buffer* _buffer, size_t newSize);
153 static status_t remove_header(net_buffer* _buffer, size_t bytes);
154 static status_t remove_trailer(net_buffer* _buffer, size_t bytes);
155 static status_t append_cloned_data(net_buffer* _buffer, net_buffer* _source,
156 					uint32 offset, size_t bytes);
157 static status_t read_data(net_buffer* _buffer, size_t offset, void* data,
158 					size_t size);
159 
160 
161 #if ENABLE_STATS
162 static vint32 sAllocatedDataHeaderCount = 0;
163 static vint32 sAllocatedNetBufferCount = 0;
164 static vint32 sEverAllocatedDataHeaderCount = 0;
165 static vint32 sEverAllocatedNetBufferCount = 0;
166 #endif
167 
168 
169 #if NET_BUFFER_TRACING
170 
171 
172 namespace NetBufferTracing {
173 
174 
175 class NetBufferTraceEntry : public AbstractTraceEntry {
176 public:
177 	NetBufferTraceEntry(net_buffer* buffer)
178 		:
179 		fBuffer(buffer)
180 	{
181 #if NET_BUFFER_TRACING_STACK_TRACE
182 	fStackTrace = capture_tracing_stack_trace(
183 		NET_BUFFER_TRACING_STACK_TRACE, 0, false);
184 #endif
185 	}
186 
187 #if NET_BUFFER_TRACING_STACK_TRACE
188 	virtual void DumpStackTrace(TraceOutput& out)
189 	{
190 		out.PrintStackTrace(fStackTrace);
191 	}
192 #endif
193 
194 protected:
195 	net_buffer*	fBuffer;
196 #if NET_BUFFER_TRACING_STACK_TRACE
197 	tracing_stack_trace* fStackTrace;
198 #endif
199 };
200 
201 
202 class Create : public NetBufferTraceEntry {
203 public:
204 	Create(size_t headerSpace, net_buffer* buffer)
205 		:
206 		NetBufferTraceEntry(buffer),
207 		fHeaderSpace(headerSpace)
208 	{
209 		Initialized();
210 	}
211 
212 	virtual void AddDump(TraceOutput& out)
213 	{
214 		out.Print("net buffer create: header space: %lu -> buffer: %p",
215 			fHeaderSpace, fBuffer);
216 	}
217 
218 private:
219 	size_t		fHeaderSpace;
220 };
221 
222 
223 class Free : public NetBufferTraceEntry {
224 public:
225 	Free(net_buffer* buffer)
226 		:
227 		NetBufferTraceEntry(buffer)
228 	{
229 		Initialized();
230 	}
231 
232 	virtual void AddDump(TraceOutput& out)
233 	{
234 		out.Print("net buffer free: buffer: %p", fBuffer);
235 	}
236 };
237 
238 
239 class Duplicate : public NetBufferTraceEntry {
240 public:
241 	Duplicate(net_buffer* buffer, net_buffer* clone)
242 		:
243 		NetBufferTraceEntry(buffer),
244 		fClone(clone)
245 	{
246 		Initialized();
247 	}
248 
249 	virtual void AddDump(TraceOutput& out)
250 	{
251 		out.Print("net buffer dup: buffer: %p -> %p", fBuffer, fClone);
252 	}
253 
254 private:
255 	net_buffer*		fClone;
256 };
257 
258 
259 class Clone : public NetBufferTraceEntry {
260 public:
261 	Clone(net_buffer* buffer, bool shareFreeSpace, net_buffer* clone)
262 		:
263 		NetBufferTraceEntry(buffer),
264 		fClone(clone),
265 		fShareFreeSpace(shareFreeSpace)
266 	{
267 		Initialized();
268 	}
269 
270 	virtual void AddDump(TraceOutput& out)
271 	{
272 		out.Print("net buffer clone: buffer: %p, share free space: %s "
273 			"-> %p", fBuffer, fShareFreeSpace ? "true" : "false", fClone);
274 	}
275 
276 private:
277 	net_buffer*		fClone;
278 	bool			fShareFreeSpace;
279 };
280 
281 
282 class Split : public NetBufferTraceEntry {
283 public:
284 	Split(net_buffer* buffer, uint32 offset, net_buffer* newBuffer)
285 		:
286 		NetBufferTraceEntry(buffer),
287 		fNewBuffer(newBuffer),
288 		fOffset(offset)
289 	{
290 		Initialized();
291 	}
292 
293 	virtual void AddDump(TraceOutput& out)
294 	{
295 		out.Print("net buffer split: buffer: %p, offset: %lu "
296 			"-> %p", fBuffer, fOffset, fNewBuffer);
297 	}
298 
299 private:
300 	net_buffer*		fNewBuffer;
301 	uint32			fOffset;
302 };
303 
304 
305 class Merge : public NetBufferTraceEntry {
306 public:
307 	Merge(net_buffer* buffer, net_buffer* otherBuffer, bool after)
308 		:
309 		NetBufferTraceEntry(buffer),
310 		fOtherBuffer(otherBuffer),
311 		fAfter(after)
312 	{
313 		Initialized();
314 	}
315 
316 	virtual void AddDump(TraceOutput& out)
317 	{
318 		out.Print("net buffer merge: buffers: %p + %p, after: %s "
319 			"-> %p", fBuffer, fOtherBuffer, fAfter ? "true" : "false",
320 			fOtherBuffer);
321 	}
322 
323 private:
324 	net_buffer*		fOtherBuffer;
325 	bool			fAfter;
326 };
327 
328 
329 class AppendCloned : public NetBufferTraceEntry {
330 public:
331 	AppendCloned(net_buffer* buffer, net_buffer* source, uint32 offset,
332 		size_t size)
333 		:
334 		NetBufferTraceEntry(buffer),
335 		fSource(source),
336 		fOffset(offset),
337 		fSize(size)
338 	{
339 		Initialized();
340 	}
341 
342 	virtual void AddDump(TraceOutput& out)
343 	{
344 		out.Print("net buffer append cloned: buffer: %p, from: %p, "
345 			"offset: %lu, size: %lu", fBuffer, fSource, fOffset, fSize);
346 	}
347 
348 private:
349 	net_buffer*		fSource;
350 	uint32			fOffset;
351 	size_t			fSize;
352 };
353 
354 
355 class PrependSize : public NetBufferTraceEntry {
356 public:
357 	PrependSize(net_buffer* buffer, size_t size)
358 		:
359 		NetBufferTraceEntry(buffer),
360 		fSize(size)
361 	{
362 		Initialized();
363 	}
364 
365 	virtual void AddDump(TraceOutput& out)
366 	{
367 		out.Print("net buffer prepend size: buffer: %p, size: %lu", fBuffer,
368 			fSize);
369 	}
370 
371 private:
372 	size_t			fSize;
373 };
374 
375 
376 class AppendSize : public NetBufferTraceEntry {
377 public:
378 	AppendSize(net_buffer* buffer, size_t size)
379 		:
380 		NetBufferTraceEntry(buffer),
381 		fSize(size)
382 	{
383 		Initialized();
384 	}
385 
386 	virtual void AddDump(TraceOutput& out)
387 	{
388 		out.Print("net buffer append size: buffer: %p, size: %lu", fBuffer,
389 			fSize);
390 	}
391 
392 private:
393 	size_t			fSize;
394 };
395 
396 
397 class RemoveHeader : public NetBufferTraceEntry {
398 public:
399 	RemoveHeader(net_buffer* buffer, size_t size)
400 		:
401 		NetBufferTraceEntry(buffer),
402 		fSize(size)
403 	{
404 		Initialized();
405 	}
406 
407 	virtual void AddDump(TraceOutput& out)
408 	{
409 		out.Print("net buffer remove header: buffer: %p, size: %lu",
410 			fBuffer, fSize);
411 	}
412 
413 private:
414 	size_t			fSize;
415 };
416 
417 
418 class Trim : public NetBufferTraceEntry {
419 public:
420 	Trim(net_buffer* buffer, size_t size)
421 		:
422 		NetBufferTraceEntry(buffer),
423 		fSize(size)
424 	{
425 		Initialized();
426 	}
427 
428 	virtual void AddDump(TraceOutput& out)
429 	{
430 		out.Print("net buffer trim: buffer: %p, size: %lu",
431 			fBuffer, fSize);
432 	}
433 
434 private:
435 	size_t			fSize;
436 };
437 
438 
439 class Read : public NetBufferTraceEntry {
440 public:
441 	Read(net_buffer* buffer, uint32 offset, void* data, size_t size)
442 		:
443 		NetBufferTraceEntry(buffer),
444 		fData(data),
445 		fOffset(offset),
446 		fSize(size)
447 	{
448 		Initialized();
449 	}
450 
451 	virtual void AddDump(TraceOutput& out)
452 	{
453 		out.Print("net buffer read: buffer: %p, offset: %lu, size: %lu, "
454 			"data: %p", fBuffer, fOffset, fSize, fData);
455 	}
456 
457 private:
458 	void*			fData;
459 	uint32			fOffset;
460 	size_t			fSize;
461 };
462 
463 
464 class Write : public NetBufferTraceEntry {
465 public:
466 	Write(net_buffer* buffer, uint32 offset, const void* data, size_t size)
467 		:
468 		NetBufferTraceEntry(buffer),
469 		fData(data),
470 		fOffset(offset),
471 		fSize(size)
472 	{
473 		Initialized();
474 	}
475 
476 	virtual void AddDump(TraceOutput& out)
477 	{
478 		out.Print("net buffer write: buffer: %p, offset: %lu, size: %lu, "
479 			"data: %p", fBuffer, fOffset, fSize, fData);
480 	}
481 
482 private:
483 	const void*		fData;
484 	uint32			fOffset;
485 	size_t			fSize;
486 };
487 
488 
489 #if NET_BUFFER_TRACING >= 2
490 
491 class DataHeaderTraceEntry : public AbstractTraceEntry {
492 public:
493 	DataHeaderTraceEntry(data_header* header)
494 		:
495 		fHeader(header)
496 	{
497 	}
498 
499 protected:
500 	data_header*	fHeader;
501 };
502 
503 
504 class CreateDataHeader : public DataHeaderTraceEntry {
505 public:
506 	CreateDataHeader(data_header* header)
507 		:
508 		DataHeaderTraceEntry(header)
509 	{
510 		Initialized();
511 	}
512 
513 	virtual void AddDump(TraceOutput& out)
514 	{
515 		out.Print("net buffer data header create:  header: %p", fHeader);
516 	}
517 };
518 
519 
520 class AcquireDataHeader : public DataHeaderTraceEntry {
521 public:
522 	AcquireDataHeader(data_header* header, int32 refCount)
523 		:
524 		DataHeaderTraceEntry(header),
525 		fRefCount(refCount)
526 	{
527 		Initialized();
528 	}
529 
530 	virtual void AddDump(TraceOutput& out)
531 	{
532 		out.Print("net buffer data header acquire: header: %p "
533 			"-> ref count: %ld", fHeader, fRefCount);
534 	}
535 
536 private:
537 	int32			fRefCount;
538 };
539 
540 
541 class ReleaseDataHeader : public DataHeaderTraceEntry {
542 public:
543 	ReleaseDataHeader(data_header* header, int32 refCount)
544 		:
545 		DataHeaderTraceEntry(header),
546 		fRefCount(refCount)
547 	{
548 		Initialized();
549 	}
550 
551 	virtual void AddDump(TraceOutput& out)
552 	{
553 		out.Print("net buffer data header release: header: %p "
554 			"-> ref count: %ld", fHeader, fRefCount);
555 	}
556 
557 private:
558 	int32			fRefCount;
559 };
560 
561 #	define T2(x)	new(std::nothrow) NetBufferTracing::x
562 #else
563 #	define T2(x)
564 #endif	// NET_BUFFER_TRACING >= 2
565 
566 }	// namespace NetBufferTracing
567 
568 #	define T(x)	new(std::nothrow) NetBufferTracing::x
569 
570 #else
571 #	define T(x)
572 #	define T2(x)
573 #endif	// NET_BUFFER_TRACING
574 
575 
576 static void
577 dump_buffer(net_buffer* _buffer)
578 {
579 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
580 
581 	dprintf("buffer %p, size %ld\n", buffer, buffer->size);
582 	data_node* node = NULL;
583 	while ((node = (data_node*)list_get_next_item(&buffer->buffers, node))
584 			!= NULL) {
585 		dprintf("  node %p, offset %lu, used %u, header %u, tail %u, "
586 			"header %p\n", node, node->offset, node->used, node->HeaderSpace(),
587 			node->TailSpace(), node->header);
588 		//dump_block((char*)node->start, node->used, "    ");
589 		dump_block((char*)node->start, min_c(node->used, 32), "    ");
590 	}
591 }
592 
593 #if ENABLE_DEBUGGER_COMMANDS
594 
595 static int
596 dump_net_buffer(int argc, char** argv)
597 {
598 	if (argc != 2) {
599 		kprintf("usage: %s [address]\n", argv[0]);
600 		return 0;
601 	}
602 
603 	dump_buffer((net_buffer*)parse_expression(argv[1]));
604 	return 0;
605 }
606 
607 #endif	// ENABLE_DEBUGGER_COMMANDS
608 
609 #if ENABLE_STATS
610 
611 static int
612 dump_net_buffer_stats(int argc, char** argv)
613 {
614 	kprintf("allocated data headers: %7ld / %7ld\n", sAllocatedDataHeaderCount,
615 		sEverAllocatedDataHeaderCount);
616 	kprintf("allocated net buffers:  %7ld / %7ld\n", sAllocatedNetBufferCount,
617 		sEverAllocatedNetBufferCount);
618 	return 0;
619 }
620 
621 #endif	// ENABLE_STATS
622 
623 #if PARANOID_BUFFER_CHECK
624 
625 static void
626 check_buffer(net_buffer* _buffer)
627 {
628 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
629 
630 	// sum up the size of all nodes
631 	size_t size = 0;
632 
633 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
634 	while (node != NULL) {
635 		if (node->offset != size) {
636 			panic("net_buffer %p: bad node %p offset (%lu vs. %lu)",
637 				buffer, node, node->offset, size);
638 			return;
639 		}
640 		size += node->used;
641 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
642 	}
643 
644 	if (size != buffer->size) {
645 		panic("net_buffer %p size != sum of its data node sizes (%lu vs. %lu)",
646 			buffer, buffer->size, size);
647 		return;
648 	}
649 }
650 
651 
652 #if 0
653 static void
654 check_buffer_contents(net_buffer* buffer, size_t offset, const void* data,
655 	size_t size)
656 {
657 	void* bufferData = malloc(size);
658 	if (bufferData == NULL)
659 		return;
660 
661 	if (read_data(buffer, offset, bufferData, size) == B_OK) {
662 		if (memcmp(bufferData, data, size) != 0) {
663 			int32 index = 0;
664 			while (((uint8*)data)[index] == ((uint8*)bufferData)[index])
665 				index++;
666 			panic("check_buffer_contents(): contents check failed at index "
667 				"%ld, buffer: %p, offset: %lu, size: %lu", index, buffer,
668 				offset, size);
669 		}
670 	} else {
671 		panic("failed to read from buffer %p, offset: %lu, size: %lu",
672 			buffer, offset, size);
673 	}
674 
675 	free(bufferData);
676 }
677 
678 
679 static void
680 check_buffer_contents(net_buffer* buffer, size_t offset, net_buffer* source,
681 	size_t sourceOffset, size_t size)
682 {
683 	void* bufferData = malloc(size);
684 	if (bufferData == NULL)
685 		return;
686 
687 	if (read_data(source, sourceOffset, bufferData, size) == B_OK) {
688 		check_buffer_contents(buffer, offset, bufferData, size);
689 	} else {
690 		panic("failed to read from source buffer %p, offset: %lu, size: %lu",
691 			source, sourceOffset, size);
692 	}
693 
694 	free(bufferData);
695 }
696 #endif
697 
698 
699 # 	define CHECK_BUFFER(buffer)	check_buffer(buffer)
700 #else
701 # 	define CHECK_BUFFER(buffer)	do {} while (false)
702 #endif	// !PARANOID_BUFFER_CHECK
703 
704 
705 static inline data_header*
706 allocate_data_header()
707 {
708 #if ENABLE_STATS
709 	atomic_add(&sAllocatedDataHeaderCount, 1);
710 	atomic_add(&sEverAllocatedDataHeaderCount, 1);
711 #endif
712 	return (data_header*)object_cache_alloc(sDataNodeCache, 0);
713 }
714 
715 
716 static inline net_buffer_private*
717 allocate_net_buffer()
718 {
719 #if ENABLE_STATS
720 	atomic_add(&sAllocatedNetBufferCount, 1);
721 	atomic_add(&sEverAllocatedNetBufferCount, 1);
722 #endif
723 	return (net_buffer_private*)object_cache_alloc(sNetBufferCache, 0);
724 }
725 
726 
727 static inline void
728 free_data_header(data_header* header)
729 {
730 #if ENABLE_STATS
731 	if (header != NULL)
732 		atomic_add(&sAllocatedDataHeaderCount, -1);
733 #endif
734 	object_cache_free(sDataNodeCache, header, 0);
735 }
736 
737 
738 static inline void
739 free_net_buffer(net_buffer_private* buffer)
740 {
741 #if ENABLE_STATS
742 	if (buffer != NULL)
743 		atomic_add(&sAllocatedNetBufferCount, -1);
744 #endif
745 	object_cache_free(sNetBufferCache, buffer, 0);
746 }
747 
748 
749 static data_header*
750 create_data_header(size_t headerSpace)
751 {
752 	data_header* header = allocate_data_header();
753 	if (header == NULL)
754 		return NULL;
755 
756 	header->ref_count = 1;
757 	header->physical_address = 0;
758 		// TODO: initialize this correctly
759 	header->space.size = headerSpace;
760 	header->space.free = headerSpace;
761 	header->data_end = (uint8*)header + DATA_HEADER_SIZE;
762 	header->tail_space = (uint8*)header + BUFFER_SIZE - header->data_end
763 		- headerSpace;
764 	header->first_free = NULL;
765 
766 	TRACE(("%ld:   create new data header %p\n", find_thread(NULL), header));
767 	T2(CreateDataHeader(header));
768 	return header;
769 }
770 
771 
772 static void
773 release_data_header(data_header* header)
774 {
775 	int32 refCount = atomic_add(&header->ref_count, -1);
776 	T2(ReleaseDataHeader(header, refCount - 1));
777 	if (refCount != 1)
778 		return;
779 
780 	TRACE(("%ld:   free header %p\n", find_thread(NULL), header));
781 	free_data_header(header);
782 }
783 
784 
785 inline void
786 acquire_data_header(data_header* header)
787 {
788 	int32 refCount = atomic_add(&header->ref_count, 1);
789 	(void)refCount;
790 	T2(AcquireDataHeader(header, refCount + 1));
791 }
792 
793 
794 static void
795 free_data_header_space(data_header* header, uint8* data, size_t size)
796 {
797 	if (size < sizeof(free_data))
798 		size = sizeof(free_data);
799 
800 	free_data* freeData = (free_data*)data;
801 	freeData->next = header->first_free;
802 	freeData->size = size;
803 
804 	header->first_free = freeData;
805 }
806 
807 
808 /*!	Tries to allocate \a size bytes from the free space in the header.
809 */
810 static uint8*
811 alloc_data_header_space(data_header* header, size_t size)
812 {
813 	if (size < sizeof(free_data))
814 		size = sizeof(free_data);
815 	size = _ALIGN(size);
816 
817 	if (header->first_free != NULL && header->first_free->size >= size) {
818 		// the first entry of the header space matches the allocation's needs
819 
820 		// TODO: If the free space is greater than what shall be allocated, we
821 		// leak the remainder of the space. We should only allocate multiples of
822 		// _ALIGN(sizeof(free_data)) and split free space in this case. It's not
823 		// that pressing, since the only thing allocated ATM are data_nodes, and
824 		// thus the free space entries will always have the right size.
825 		uint8* data = (uint8*)header->first_free;
826 		header->first_free = header->first_free->next;
827 		return data;
828 	}
829 
830 	if (header->space.free < size) {
831 		// there is no free space left, search free list
832 		free_data* freeData = header->first_free;
833 		free_data* last = NULL;
834 		while (freeData != NULL) {
835 			if (last != NULL && freeData->size >= size) {
836 				// take this one
837 				last->next = freeData->next;
838 				return (uint8*)freeData;
839 			}
840 
841 			last = freeData;
842 			freeData = freeData->next;
843 		}
844 
845 		return NULL;
846 	}
847 
848 	// allocate new space
849 
850 	uint8* data = header->data_end;
851 	header->data_end += size;
852 	header->space.free -= size;
853 
854 	return data;
855 }
856 
857 
858 static uint8*
859 alloc_data_header_space(net_buffer_private* buffer, size_t size,
860 	data_header** _header = NULL)
861 {
862 	// try to allocate in our current allocation header
863 	uint8* allocated = alloc_data_header_space(buffer->allocation_header, size);
864 	if (allocated == NULL) {
865 		// not enough header space left -- create a fresh buffer for headers
866 		data_header* header = create_data_header(MAX_FREE_BUFFER_SIZE);
867 		if (header == NULL)
868 			return NULL;
869 
870 		// release our reference to the old header -- it will will stay around
871 		// until the last reference to it is released
872 		release_data_header(buffer->allocation_header);
873 		buffer->allocation_header = header;
874 			// We keep the initial reference.
875 
876 		// now the allocation can only fail, if size is too big
877 		allocated = alloc_data_header_space(buffer->allocation_header, size);
878 	}
879 
880 	if (_header != NULL)
881 		*_header = buffer->allocation_header;
882 
883 	return allocated;
884 }
885 
886 
887 static data_node*
888 add_first_data_node(data_header* header)
889 {
890 	data_node* node = (data_node*)alloc_data_header_space(header,
891 		sizeof(data_node));
892 	if (node == NULL)
893 		return NULL;
894 
895 	TRACE(("%ld:   add first data node %p to header %p\n", find_thread(NULL),
896 		node, header));
897 
898 	acquire_data_header(header);
899 
900 	memset(node, 0, sizeof(struct data_node));
901 	node->located = header;
902 	node->header = header;
903 	node->offset = 0;
904 	node->start = header->data_end + header->space.free;
905 	node->used = 0;
906 	node->flags = 0;
907 
908 	return node;
909 }
910 
911 
912 static data_node*
913 add_data_node(net_buffer_private* buffer, data_header* header)
914 {
915 	data_header* located;
916 	data_node* node = (data_node*)alloc_data_header_space(buffer,
917 		sizeof(data_node), &located);
918 	if (node == NULL)
919 		return NULL;
920 
921 	TRACE(("%ld:   add data node %p to header %p\n", find_thread(NULL), node,
922 		header));
923 
924 	acquire_data_header(header);
925 	if (located != header)
926 		acquire_data_header(located);
927 
928 	memset(node, 0, sizeof(struct data_node));
929 	node->located = located;
930 	node->header = header;
931 	node->flags = 0;
932 	return node;
933 }
934 
935 
936 void
937 remove_data_node(data_node* node)
938 {
939 	data_header* located = node->located;
940 
941 	TRACE(("%ld:   remove data node %p from header %p (located %p)\n",
942 		find_thread(NULL), node, node->header, located));
943 
944 	// Move all used and tail space to the header space, which is useful in case
945 	// this is the first node of a buffer (i.e. the header is an allocation
946 	// header).
947 	node->FreeSpace();
948 
949 	if (located != node->header)
950 		release_data_header(node->header);
951 
952 	if (located == NULL)
953 		return;
954 
955 	free_data_header_space(located, (uint8*)node, sizeof(data_node));
956 
957 	release_data_header(located);
958 }
959 
960 
961 static inline data_node*
962 get_node_at_offset(net_buffer_private* buffer, size_t offset)
963 {
964 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
965 	while (node->offset + node->used <= offset) {
966 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
967 		if (node == NULL)
968 			return NULL;
969 	}
970 
971 	return node;
972 }
973 
974 
975 /*!	Appends up to \a size bytes from the data of the \a from net_buffer to the
976 	\a to net_buffer. The source buffer will remain unchanged.
977 */
978 static status_t
979 append_data_from_buffer(net_buffer* to, const net_buffer* from, size_t size)
980 {
981 	net_buffer_private* source = (net_buffer_private*)from;
982 	net_buffer_private* dest = (net_buffer_private*)to;
983 
984 	if (size > from->size)
985 		return B_BAD_VALUE;
986 	if (size == 0)
987 		return B_OK;
988 
989 	data_node* nodeTo = get_node_at_offset(source, size);
990 	if (nodeTo == NULL)
991 		return B_BAD_VALUE;
992 
993 	data_node* node = (data_node*)list_get_first_item(&source->buffers);
994 	if (node == NULL) {
995 		CHECK_BUFFER(source);
996 		return B_ERROR;
997 	}
998 
999 	while (node != nodeTo) {
1000 		if (append_data(dest, node->start, node->used) < B_OK) {
1001 			CHECK_BUFFER(dest);
1002 			return B_ERROR;
1003 		}
1004 
1005 		node = (data_node*)list_get_next_item(&source->buffers, node);
1006 	}
1007 
1008 	int32 diff = node->offset + node->used - size;
1009 	if (append_data(dest, node->start, node->used - diff) < B_OK) {
1010 		CHECK_BUFFER(dest);
1011 		return B_ERROR;
1012 	}
1013 
1014 	CHECK_BUFFER(dest);
1015 
1016 	return B_OK;
1017 }
1018 
1019 
1020 static void
1021 copy_metadata(net_buffer* destination, const net_buffer* source)
1022 {
1023 	memcpy(destination->source, source->source,
1024 		min_c(source->source->sa_len, sizeof(sockaddr_storage)));
1025 	memcpy(destination->destination, source->destination,
1026 		min_c(source->destination->sa_len, sizeof(sockaddr_storage)));
1027 
1028 	destination->flags = source->flags;
1029 	destination->interface = source->interface;
1030 	destination->offset = source->offset;
1031 	destination->protocol = source->protocol;
1032 	destination->type = source->type;
1033 }
1034 
1035 
1036 //	#pragma mark - module API
1037 
1038 
1039 static net_buffer*
1040 create_buffer(size_t headerSpace)
1041 {
1042 	net_buffer_private* buffer = allocate_net_buffer();
1043 	if (buffer == NULL)
1044 		return NULL;
1045 
1046 	TRACE(("%ld: create buffer %p\n", find_thread(NULL), buffer));
1047 
1048 	// Make sure headerSpace is valid and at least the initial node fits.
1049 	headerSpace = _ALIGN(headerSpace);
1050 	if (headerSpace < DATA_NODE_SIZE)
1051 		headerSpace = DATA_NODE_SIZE;
1052 	else if (headerSpace > MAX_FREE_BUFFER_SIZE)
1053 		headerSpace = MAX_FREE_BUFFER_SIZE;
1054 
1055 	data_header* header = create_data_header(headerSpace);
1056 	if (header == NULL) {
1057 		free_net_buffer(buffer);
1058 		return NULL;
1059 	}
1060 	buffer->allocation_header = header;
1061 
1062 	data_node* node = add_first_data_node(header);
1063 
1064 	list_init(&buffer->buffers);
1065 	list_add_item(&buffer->buffers, node);
1066 
1067 	buffer->ancillary_data = NULL;
1068 
1069 	buffer->source = (sockaddr*)&buffer->storage.source;
1070 	buffer->destination = (sockaddr*)&buffer->storage.destination;
1071 
1072 	buffer->storage.source.ss_len = 0;
1073 	buffer->storage.destination.ss_len = 0;
1074 
1075 	buffer->interface = NULL;
1076 	buffer->offset = 0;
1077 	buffer->flags = 0;
1078 	buffer->size = 0;
1079 
1080 	buffer->type = -1;
1081 
1082 	CHECK_BUFFER(buffer);
1083 	CREATE_PARANOIA_CHECK_SET(buffer, "net_buffer");
1084 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1085 		sizeof(buffer->size));
1086 
1087 	T(Create(headerSpace, buffer));
1088 
1089 	return buffer;
1090 }
1091 
1092 
1093 static void
1094 free_buffer(net_buffer* _buffer)
1095 {
1096 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1097 
1098 	TRACE(("%ld: free buffer %p\n", find_thread(NULL), buffer));
1099 	T(Free(buffer));
1100 
1101 	CHECK_BUFFER(buffer);
1102 	DELETE_PARANOIA_CHECK_SET(buffer);
1103 
1104 	while (data_node* node
1105 			= (data_node*)list_remove_head_item(&buffer->buffers)) {
1106 		remove_data_node(node);
1107 	}
1108 
1109 	delete_ancillary_data_container(buffer->ancillary_data);
1110 
1111 	release_data_header(buffer->allocation_header);
1112 
1113 	free_net_buffer(buffer);
1114 }
1115 
1116 
1117 /*!	Creates a duplicate of the \a buffer. The new buffer does not share internal
1118 	storage; they are completely independent from each other.
1119 */
1120 static net_buffer*
1121 duplicate_buffer(net_buffer* _buffer)
1122 {
1123 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1124 
1125 	ParanoiaChecker _(buffer);
1126 
1127 	TRACE(("%ld: duplicate_buffer(buffer %p)\n", find_thread(NULL), buffer));
1128 
1129 	// TODO: We might want to choose a better header space. The minimal
1130 	// one doesn't allow to prepend any data without allocating a new header.
1131 	// The same holds for appending cloned data.
1132 	net_buffer* duplicate = create_buffer(DATA_NODE_SIZE);
1133 	if (duplicate == NULL)
1134 		return NULL;
1135 
1136 	TRACE(("%ld:   duplicate: %p)\n", find_thread(NULL), duplicate));
1137 
1138 	// copy the data from the source buffer
1139 
1140 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
1141 	while (node != NULL) {
1142 		if (append_data(duplicate, node->start, node->used) < B_OK) {
1143 			free_buffer(duplicate);
1144 			CHECK_BUFFER(buffer);
1145 			return NULL;
1146 		}
1147 
1148 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1149 	}
1150 
1151 	copy_metadata(duplicate, buffer);
1152 
1153 	ASSERT(duplicate->size == buffer->size);
1154 	CHECK_BUFFER(buffer);
1155 	CHECK_BUFFER(duplicate);
1156 	RUN_PARANOIA_CHECKS(duplicate);
1157 
1158 	T(Duplicate(buffer, duplicate));
1159 
1160 	return duplicate;
1161 }
1162 
1163 
1164 /*!	Clones the buffer by grabbing another reference to the underlying data.
1165 	If that data changes, it will be changed in the clone as well.
1166 
1167 	If \a shareFreeSpace is \c true, the cloned buffer may claim the free
1168 	space in the original buffer as the original buffer can still do. If you
1169 	are using this, it's your responsibility that only one of the buffers
1170 	will do this.
1171 */
1172 static net_buffer*
1173 clone_buffer(net_buffer* _buffer, bool shareFreeSpace)
1174 {
1175 	// TODO: See, if the commented out code can be fixed in a safe way. We could
1176 	// probably place cloned nodes on a header not belonging to our buffer, if
1177 	// we don't free the header space for the node when removing it. Otherwise we
1178 	// mess with the header's free list which might at the same time be accessed
1179 	// by another thread.
1180 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1181 
1182 	net_buffer* clone = create_buffer(MAX_FREE_BUFFER_SIZE);
1183 	if (clone == NULL)
1184 		return NULL;
1185 
1186 	if (append_cloned_data(clone, buffer, 0, buffer->size) != B_OK) {
1187 		free_buffer(clone);
1188 		return NULL;
1189 	}
1190 
1191 	copy_metadata(clone, buffer);
1192 	ASSERT(clone->size == buffer->size);
1193 
1194 	return clone;
1195 
1196 #if 0
1197 	ParanoiaChecker _(buffer);
1198 
1199 	TRACE(("%ld: clone_buffer(buffer %p)\n", find_thread(NULL), buffer));
1200 
1201 	net_buffer_private* clone = allocate_net_buffer();
1202 	if (clone == NULL)
1203 		return NULL;
1204 
1205 	TRACE(("%ld:   clone: %p\n", find_thread(NULL), buffer));
1206 
1207 	data_node* sourceNode = (data_node*)list_get_first_item(&buffer->buffers);
1208 	if (sourceNode == NULL) {
1209 		free_net_buffer(clone);
1210 		return NULL;
1211 	}
1212 
1213 	clone->source = (sockaddr*)&clone->storage.source;
1214 	clone->destination = (sockaddr*)&clone->storage.destination;
1215 
1216 	list_init(&clone->buffers);
1217 
1218 	// grab reference to this buffer - all additional nodes will get
1219 	// theirs in add_data_node()
1220 	acquire_data_header(sourceNode->header);
1221 	data_node* node = &clone->first_node;
1222 	node->header = sourceNode->header;
1223 	node->located = NULL;
1224 	node->used_header_space = &node->own_header_space;
1225 
1226 	while (sourceNode != NULL) {
1227 		node->start = sourceNode->start;
1228 		node->used = sourceNode->used;
1229 		node->offset = sourceNode->offset;
1230 
1231 		if (shareFreeSpace) {
1232 			// both buffers could claim the free space - note that this option
1233 			// has to be used carefully
1234 			node->used_header_space = &sourceNode->header->space;
1235 			node->tail_space = sourceNode->tail_space;
1236 		} else {
1237 			// the free space stays with the original buffer
1238 			node->used_header_space->size = 0;
1239 			node->used_header_space->free = 0;
1240 			node->tail_space = 0;
1241 		}
1242 
1243 		// add node to clone's list of buffers
1244 		list_add_item(&clone->buffers, node);
1245 
1246 		sourceNode = (data_node*)list_get_next_item(&buffer->buffers,
1247 			sourceNode);
1248 		if (sourceNode == NULL)
1249 			break;
1250 
1251 		node = add_data_node(sourceNode->header);
1252 		if (node == NULL) {
1253 			// There was not enough space left for another node in this buffer
1254 			// TODO: handle this case!
1255 			panic("clone buffer hits size limit... (fix me)");
1256 			free_net_buffer(clone);
1257 			return NULL;
1258 		}
1259 	}
1260 
1261 	copy_metadata(clone, buffer);
1262 
1263 	ASSERT(clone->size == buffer->size);
1264 	CREATE_PARANOIA_CHECK_SET(clone, "net_buffer");
1265 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, clone, &clone->size,
1266 		sizeof(clone->size));
1267 	CHECK_BUFFER(buffer);
1268 	CHECK_BUFFER(clone);
1269 
1270 	T(Clone(buffer, shareFreeSpace, clone));
1271 
1272 	return clone;
1273 #endif
1274 }
1275 
1276 
1277 /*!	Split the buffer at offset, the header data
1278 	is returned as new buffer.
1279 */
1280 static net_buffer*
1281 split_buffer(net_buffer* from, uint32 offset)
1282 {
1283 	net_buffer* buffer = create_buffer(DATA_NODE_SIZE);
1284 	if (buffer == NULL)
1285 		return NULL;
1286 
1287 	copy_metadata(buffer, from);
1288 
1289 	ParanoiaChecker _(from);
1290 	ParanoiaChecker _2(buffer);
1291 
1292 	TRACE(("%ld: split_buffer(buffer %p -> %p, offset %ld)\n",
1293 		find_thread(NULL), from, buffer, offset));
1294 
1295 	if (append_data_from_buffer(buffer, from, offset) == B_OK) {
1296 		if (remove_header(from, offset) == B_OK) {
1297 			CHECK_BUFFER(from);
1298 			CHECK_BUFFER(buffer);
1299 			T(Split(from, offset, buffer));
1300 			return buffer;
1301 		}
1302 	}
1303 
1304 	free_buffer(buffer);
1305 	CHECK_BUFFER(from);
1306 	return NULL;
1307 }
1308 
1309 
1310 /*!	Merges the second buffer with the first. If \a after is \c true, the
1311 	second buffer's contents will be appended to the first ones, else they
1312 	will be prepended.
1313 	The second buffer will be freed if this function succeeds.
1314 */
1315 static status_t
1316 merge_buffer(net_buffer* _buffer, net_buffer* _with, bool after)
1317 {
1318 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1319 	net_buffer_private* with = (net_buffer_private*)_with;
1320 	if (with == NULL)
1321 		return B_BAD_VALUE;
1322 
1323 	TRACE(("%ld: merge buffer %p with %p (%s)\n", find_thread(NULL), buffer,
1324 		with, after ? "after" : "before"));
1325 	T(Merge(buffer, with, after));
1326 	//dump_buffer(buffer);
1327 	//dprintf("with:\n");
1328 	//dump_buffer(with);
1329 
1330 	ParanoiaChecker _(buffer);
1331 	CHECK_BUFFER(buffer);
1332 	CHECK_BUFFER(with);
1333 
1334 	// TODO: this is currently very simplistic, I really need to finish the
1335 	//	harder part of this implementation (data_node management per header)
1336 
1337 	data_node* before = NULL;
1338 
1339 	// TODO: Do allocating nodes (the only part that can fail) upfront. Put them
1340 	// in a list, so we can easily clean up, if necessary.
1341 
1342 	if (!after) {
1343 		// change offset of all nodes already in the buffer
1344 		data_node* node = NULL;
1345 		while (true) {
1346 			node = (data_node*)list_get_next_item(&buffer->buffers, node);
1347 			if (node == NULL)
1348 				break;
1349 
1350 			node->offset += with->size;
1351 			if (before == NULL)
1352 				before = node;
1353 		}
1354 	}
1355 
1356 	data_node* last = NULL;
1357 
1358 	while (true) {
1359 		data_node* node = (data_node*)list_get_next_item(&with->buffers, last);
1360 		if (node == NULL)
1361 			break;
1362 
1363 		if ((uint8*)node > (uint8*)node->header
1364 			&& (uint8*)node < (uint8*)node->header + BUFFER_SIZE) {
1365 			// The node is already in the buffer, we can just move it
1366 			// over to the new owner
1367 			list_remove_item(&with->buffers, node);
1368 			with->size -= node->used;
1369 		} else {
1370 			// we need a new place for this node
1371 			data_node* newNode = add_data_node(buffer, node->header);
1372 			if (newNode == NULL) {
1373 				// TODO: try to revert buffers to their initial state!!
1374 				return ENOBUFS;
1375 			}
1376 
1377 			last = node;
1378 			*newNode = *node;
1379 			node = newNode;
1380 				// the old node will get freed with its buffer
1381 		}
1382 
1383 		if (after) {
1384 			list_add_item(&buffer->buffers, node);
1385 			node->offset = buffer->size;
1386 		} else
1387 			list_insert_item_before(&buffer->buffers, before, node);
1388 
1389 		buffer->size += node->used;
1390 	}
1391 
1392 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1393 		sizeof(buffer->size));
1394 
1395 	// the data has been merged completely at this point
1396 	free_buffer(with);
1397 
1398 	//dprintf(" merge result:\n");
1399 	//dump_buffer(buffer);
1400 	CHECK_BUFFER(buffer);
1401 
1402 	return B_OK;
1403 }
1404 
1405 
1406 /*!	Writes into existing allocated memory.
1407 	\return B_BAD_VALUE if you write outside of the buffers current
1408 		bounds.
1409 */
1410 static status_t
1411 write_data(net_buffer* _buffer, size_t offset, const void* data, size_t size)
1412 {
1413 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1414 
1415 	T(Write(buffer, offset, data, size));
1416 
1417 	ParanoiaChecker _(buffer);
1418 
1419 	if (offset + size > buffer->size)
1420 		return B_BAD_VALUE;
1421 	if (size == 0)
1422 		return B_OK;
1423 
1424 	// find first node to write into
1425 	data_node* node = get_node_at_offset(buffer, offset);
1426 	if (node == NULL)
1427 		return B_BAD_VALUE;
1428 
1429 	offset -= node->offset;
1430 
1431 	while (true) {
1432 		size_t written = min_c(size, node->used - offset);
1433 		if (IS_USER_ADDRESS(data)) {
1434 			if (user_memcpy(node->start + offset, data, written) != B_OK)
1435 				return B_BAD_ADDRESS;
1436 		} else
1437 			memcpy(node->start + offset, data, written);
1438 
1439 		size -= written;
1440 		if (size == 0)
1441 			break;
1442 
1443 		offset = 0;
1444 		data = (void*)((uint8*)data + written);
1445 
1446 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1447 		if (node == NULL)
1448 			return B_BAD_VALUE;
1449 	}
1450 
1451 	CHECK_BUFFER(buffer);
1452 
1453 	return B_OK;
1454 }
1455 
1456 
1457 static status_t
1458 read_data(net_buffer* _buffer, size_t offset, void* data, size_t size)
1459 {
1460 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1461 
1462 	T(Read(buffer, offset, data, size));
1463 
1464 	ParanoiaChecker _(buffer);
1465 
1466 	if (offset + size > buffer->size)
1467 		return B_BAD_VALUE;
1468 	if (size == 0)
1469 		return B_OK;
1470 
1471 	// find first node to read from
1472 	data_node* node = get_node_at_offset(buffer, offset);
1473 	if (node == NULL)
1474 		return B_BAD_VALUE;
1475 
1476 	offset -= node->offset;
1477 
1478 	while (true) {
1479 		size_t bytesRead = min_c(size, node->used - offset);
1480 		if (IS_USER_ADDRESS(data)) {
1481 			if (user_memcpy(data, node->start + offset, bytesRead) != B_OK)
1482 				return B_BAD_ADDRESS;
1483 		} else
1484 			memcpy(data, node->start + offset, bytesRead);
1485 
1486 		size -= bytesRead;
1487 		if (size == 0)
1488 			break;
1489 
1490 		offset = 0;
1491 		data = (void*)((uint8*)data + bytesRead);
1492 
1493 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1494 		if (node == NULL)
1495 			return B_BAD_VALUE;
1496 	}
1497 
1498 	CHECK_BUFFER(buffer);
1499 
1500 	return B_OK;
1501 }
1502 
1503 
1504 static status_t
1505 prepend_size(net_buffer* _buffer, size_t size, void** _contiguousBuffer)
1506 {
1507 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1508 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
1509 
1510 	T(PrependSize(buffer, size));
1511 
1512 	ParanoiaChecker _(buffer);
1513 
1514 	TRACE(("%ld: prepend_size(buffer %p, size %ld) [has %u]\n",
1515 		find_thread(NULL), buffer, size, node->HeaderSpace()));
1516 	//dump_buffer(buffer);
1517 
1518 	if (node->HeaderSpace() < size) {
1519 		// we need to prepend new buffers
1520 
1521 		size_t bytesLeft = size;
1522 		size_t sizePrepended = 0;
1523 		do {
1524 			if (node->HeaderSpace() == 0) {
1525 				size_t headerSpace = MAX_FREE_BUFFER_SIZE;
1526 				data_header* header = create_data_header(headerSpace);
1527 				if (header == NULL) {
1528 					remove_header(buffer, sizePrepended);
1529 					return B_NO_MEMORY;
1530 				}
1531 
1532 				data_node* previous = node;
1533 
1534 				node = (data_node*)add_first_data_node(header);
1535 
1536 				list_insert_item_before(&buffer->buffers, previous, node);
1537 
1538 				// Release the initial reference to the header, so that it will
1539 				// be deleted when the node is removed.
1540 				release_data_header(header);
1541 			}
1542 
1543 			size_t willConsume = min_c(bytesLeft, node->HeaderSpace());
1544 
1545 			node->SubtractHeaderSpace(willConsume);
1546 			node->start -= willConsume;
1547 			node->used += willConsume;
1548 			bytesLeft -= willConsume;
1549 			sizePrepended += willConsume;
1550 		} while (bytesLeft > 0);
1551 
1552 		// correct data offset in all nodes
1553 
1554 		size_t offset = 0;
1555 		node = NULL;
1556 		while ((node = (data_node*)list_get_next_item(&buffer->buffers,
1557 				node)) != NULL) {
1558 			node->offset = offset;
1559 			offset += node->used;
1560 		}
1561 
1562 		if (_contiguousBuffer)
1563 			*_contiguousBuffer = NULL;
1564 	} else {
1565 		// the data fits into this buffer
1566 		node->SubtractHeaderSpace(size);
1567 		node->start -= size;
1568 		node->used += size;
1569 
1570 		if (_contiguousBuffer)
1571 			*_contiguousBuffer = node->start;
1572 
1573 		// adjust offset of following nodes
1574 		while ((node = (data_node*)list_get_next_item(&buffer->buffers, node))
1575 				!= NULL) {
1576 			node->offset += size;
1577 		}
1578 	}
1579 
1580 	buffer->size += size;
1581 
1582 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1583 		sizeof(buffer->size));
1584 
1585 	//dprintf(" prepend_size result:\n");
1586 	//dump_buffer(buffer);
1587 	CHECK_BUFFER(buffer);
1588 	return B_OK;
1589 }
1590 
1591 
1592 static status_t
1593 prepend_data(net_buffer* buffer, const void* data, size_t size)
1594 {
1595 	void* contiguousBuffer;
1596 	status_t status = prepend_size(buffer, size, &contiguousBuffer);
1597 	if (status < B_OK)
1598 		return status;
1599 
1600 	if (contiguousBuffer) {
1601 		if (IS_USER_ADDRESS(data)) {
1602 			if (user_memcpy(contiguousBuffer, data, size) != B_OK)
1603 				return B_BAD_ADDRESS;
1604 		} else
1605 			memcpy(contiguousBuffer, data, size);
1606 	} else
1607 		write_data(buffer, 0, data, size);
1608 
1609 	//dprintf(" prepend result:\n");
1610 	//dump_buffer(buffer);
1611 
1612 	return B_OK;
1613 }
1614 
1615 
1616 static status_t
1617 append_size(net_buffer* _buffer, size_t size, void** _contiguousBuffer)
1618 {
1619 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1620 	data_node* node = (data_node*)list_get_last_item(&buffer->buffers);
1621 
1622 	T(AppendSize(buffer, size));
1623 
1624 	ParanoiaChecker _(buffer);
1625 
1626 	TRACE(("%ld: append_size(buffer %p, size %ld)\n", find_thread(NULL),
1627 		buffer, size));
1628 	//dump_buffer(buffer);
1629 
1630 	if (node->TailSpace() < size) {
1631 		// we need to append at least one new buffer
1632 		uint32 previousTailSpace = node->TailSpace();
1633 		uint32 headerSpace = DATA_NODE_SIZE;
1634 		uint32 sizeUsed = MAX_FREE_BUFFER_SIZE - headerSpace;
1635 
1636 		// allocate space left in the node
1637 		node->SetTailSpace(0);
1638 		node->used += previousTailSpace;
1639 		buffer->size += previousTailSpace;
1640 		uint32 sizeAdded = previousTailSpace;
1641 		SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1642 			sizeof(buffer->size));
1643 
1644 		// allocate all buffers
1645 
1646 		while (sizeAdded < size) {
1647 			if (sizeAdded + sizeUsed > size) {
1648 				// last data_header and not all available space is used
1649 				sizeUsed = size - sizeAdded;
1650 			}
1651 
1652 			data_header* header = create_data_header(headerSpace);
1653 			if (header == NULL) {
1654 				remove_trailer(buffer, sizeAdded);
1655 				return B_NO_MEMORY;
1656 			}
1657 
1658 			node = add_first_data_node(header);
1659 			if (node == NULL) {
1660 				release_data_header(header);
1661 				return B_NO_MEMORY;
1662 			}
1663 
1664 			node->SetTailSpace(node->TailSpace() - sizeUsed);
1665 			node->used = sizeUsed;
1666 			node->offset = buffer->size;
1667 
1668 			buffer->size += sizeUsed;
1669 			sizeAdded += sizeUsed;
1670 			SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1671 				sizeof(buffer->size));
1672 
1673 			list_add_item(&buffer->buffers, node);
1674 
1675 			// Release the initial reference to the header, so that it will
1676 			// be deleted when the node is removed.
1677 			release_data_header(header);
1678 		}
1679 
1680 		if (_contiguousBuffer)
1681 			*_contiguousBuffer = NULL;
1682 
1683 		//dprintf(" append result 1:\n");
1684 		//dump_buffer(buffer);
1685 		CHECK_BUFFER(buffer);
1686 
1687 		return B_OK;
1688 	}
1689 
1690 	// the data fits into this buffer
1691 	node->SetTailSpace(node->TailSpace() - size);
1692 
1693 	if (_contiguousBuffer)
1694 		*_contiguousBuffer = node->start + node->used;
1695 
1696 	node->used += size;
1697 	buffer->size += size;
1698 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1699 		sizeof(buffer->size));
1700 
1701 	//dprintf(" append result 2:\n");
1702 	//dump_buffer(buffer);
1703 	CHECK_BUFFER(buffer);
1704 
1705 	return B_OK;
1706 }
1707 
1708 
1709 static status_t
1710 append_data(net_buffer* buffer, const void* data, size_t size)
1711 {
1712 	size_t used = buffer->size;
1713 
1714 	void* contiguousBuffer;
1715 	status_t status = append_size(buffer, size, &contiguousBuffer);
1716 	if (status < B_OK)
1717 		return status;
1718 
1719 	if (contiguousBuffer) {
1720 		if (IS_USER_ADDRESS(data)) {
1721 			if (user_memcpy(contiguousBuffer, data, size) != B_OK)
1722 				return B_BAD_ADDRESS;
1723 		} else
1724 			memcpy(contiguousBuffer, data, size);
1725 	} else
1726 		write_data(buffer, used, data, size);
1727 
1728 	return B_OK;
1729 }
1730 
1731 
1732 /*!	Removes bytes from the beginning of the buffer.
1733 */
1734 static status_t
1735 remove_header(net_buffer* _buffer, size_t bytes)
1736 {
1737 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1738 
1739 	T(RemoveHeader(buffer, bytes));
1740 
1741 	ParanoiaChecker _(buffer);
1742 
1743 	if (bytes > buffer->size)
1744 		return B_BAD_VALUE;
1745 
1746 	TRACE(("%ld: remove_header(buffer %p, %ld bytes)\n", find_thread(NULL),
1747 		buffer, bytes));
1748 	//dump_buffer(buffer);
1749 
1750 	size_t left = bytes;
1751 	data_node* node = NULL;
1752 
1753 	while (left >= 0) {
1754 		node = (data_node*)list_get_first_item(&buffer->buffers);
1755 		if (node == NULL) {
1756 			if (left == 0)
1757 				break;
1758 			CHECK_BUFFER(buffer);
1759 			return B_ERROR;
1760 		}
1761 
1762 		if (node->used > left)
1763 			break;
1764 
1765 		// node will be removed completely
1766 		list_remove_item(&buffer->buffers, node);
1767 		left -= node->used;
1768 		remove_data_node(node);
1769 		node = NULL;
1770 	}
1771 
1772 	// cut remaining node, if any
1773 
1774 	if (node != NULL) {
1775 		size_t cut = min_c(node->used, left);
1776 		node->offset = 0;
1777 		node->start += cut;
1778 		node->AddHeaderSpace(cut);
1779 		node->used -= cut;
1780 
1781 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1782 	}
1783 
1784 	// adjust offset of following nodes
1785 	while (node != NULL) {
1786 		node->offset -= bytes;
1787 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1788 	}
1789 
1790 	buffer->size -= bytes;
1791 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1792 		sizeof(buffer->size));
1793 
1794 	//dprintf(" remove result:\n");
1795 	//dump_buffer(buffer);
1796 	CHECK_BUFFER(buffer);
1797 
1798 	return B_OK;
1799 }
1800 
1801 
1802 /*!	Removes bytes from the end of the buffer.
1803 */
1804 static status_t
1805 remove_trailer(net_buffer* buffer, size_t bytes)
1806 {
1807 	return trim_data(buffer, buffer->size - bytes);
1808 }
1809 
1810 
1811 /*!	Trims the buffer to the specified \a newSize by removing space from
1812 	the end of the buffer.
1813 */
1814 static status_t
1815 trim_data(net_buffer* _buffer, size_t newSize)
1816 {
1817 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1818 	TRACE(("%ld: trim_data(buffer %p, newSize = %ld, buffer size = %ld)\n",
1819 		find_thread(NULL), buffer, newSize, buffer->size));
1820 	T(Trim(buffer, newSize));
1821 	//dump_buffer(buffer);
1822 
1823 	ParanoiaChecker _(buffer);
1824 
1825 	if (newSize > buffer->size)
1826 		return B_BAD_VALUE;
1827 	if (newSize == buffer->size)
1828 		return B_OK;
1829 
1830 	data_node* node = get_node_at_offset(buffer, newSize);
1831 	if (node == NULL) {
1832 		// trim size greater than buffer size
1833 		return B_BAD_VALUE;
1834 	}
1835 
1836 	int32 diff = node->used + node->offset - newSize;
1837 	node->SetTailSpace(node->TailSpace() + diff);
1838 	node->used -= diff;
1839 
1840 	if (node->used > 0)
1841 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
1842 
1843 	while (node != NULL) {
1844 		data_node* next = (data_node*)list_get_next_item(&buffer->buffers, node);
1845 		list_remove_item(&buffer->buffers, node);
1846 		remove_data_node(node);
1847 
1848 		node = next;
1849 	}
1850 
1851 	buffer->size = newSize;
1852 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1853 		sizeof(buffer->size));
1854 
1855 	//dprintf(" trim result:\n");
1856 	//dump_buffer(buffer);
1857 	CHECK_BUFFER(buffer);
1858 
1859 	return B_OK;
1860 }
1861 
1862 
1863 /*!	Appends data coming from buffer \a source to the buffer \a buffer. It only
1864 	clones the data, though, that is the data is not copied, just referenced.
1865 */
1866 static status_t
1867 append_cloned_data(net_buffer* _buffer, net_buffer* _source, uint32 offset,
1868 	size_t bytes)
1869 {
1870 	if (bytes == 0)
1871 		return B_OK;
1872 
1873 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1874 	net_buffer_private* source = (net_buffer_private*)_source;
1875 	TRACE(("%ld: append_cloned_data(buffer %p, source %p, offset = %ld, "
1876 		"bytes = %ld)\n", find_thread(NULL), buffer, source, offset, bytes));
1877 	T(AppendCloned(buffer, source, offset, bytes));
1878 
1879 	ParanoiaChecker _(buffer);
1880 	ParanoiaChecker _2(source);
1881 
1882 	if (source->size < offset + bytes || source->size < offset)
1883 		return B_BAD_VALUE;
1884 
1885 	// find data_node to start with from the source buffer
1886 	data_node* node = get_node_at_offset(source, offset);
1887 	if (node == NULL) {
1888 		// trim size greater than buffer size
1889 		return B_BAD_VALUE;
1890 	}
1891 
1892 	size_t sizeAppended = 0;
1893 
1894 	while (node != NULL && bytes > 0) {
1895 		data_node* clone = add_data_node(buffer, node->header);
1896 		if (clone == NULL) {
1897 			remove_trailer(buffer, sizeAppended);
1898 			return ENOBUFS;
1899 		}
1900 
1901 		if (offset)
1902 			offset -= node->offset;
1903 
1904 		clone->offset = buffer->size;
1905 		clone->start = node->start + offset;
1906 		clone->used = min_c(bytes, node->used - offset);
1907 		clone->flags |= DATA_NODE_READ_ONLY;
1908 
1909 		list_add_item(&buffer->buffers, clone);
1910 
1911 		offset = 0;
1912 		bytes -= clone->used;
1913 		buffer->size += clone->used;
1914 		sizeAppended += clone->used;
1915 		node = (data_node*)list_get_next_item(&source->buffers, node);
1916 	}
1917 
1918 	if (bytes != 0)
1919 		panic("add_cloned_data() failed, bytes != 0!\n");
1920 
1921 	//dprintf(" append cloned result:\n");
1922 	//dump_buffer(buffer);
1923 	CHECK_BUFFER(source);
1924 	CHECK_BUFFER(buffer);
1925 	SET_PARANOIA_CHECK(PARANOIA_SUSPICIOUS, buffer, &buffer->size,
1926 		sizeof(buffer->size));
1927 
1928 	return B_OK;
1929 }
1930 
1931 
1932 void
1933 set_ancillary_data(net_buffer* buffer, ancillary_data_container* container)
1934 {
1935 	((net_buffer_private*)buffer)->ancillary_data = container;
1936 }
1937 
1938 
1939 ancillary_data_container*
1940 get_ancillary_data(net_buffer* buffer)
1941 {
1942 	return ((net_buffer_private*)buffer)->ancillary_data;
1943 }
1944 
1945 
1946 /*!	Moves all ancillary data from buffer \c from to the end of the list of
1947 	ancillary data of buffer \c to. Note, that this is the only function that
1948 	transfers or copies ancillary data from one buffer to another.
1949 
1950 	\param from The buffer from which to remove the ancillary data.
1951 	\param to The buffer to which to add the ancillary data.
1952 	\return A pointer to the first of the moved ancillary data, if any, \c NULL
1953 		otherwise.
1954 */
1955 static void*
1956 transfer_ancillary_data(net_buffer* _from, net_buffer* _to)
1957 {
1958 	net_buffer_private* from = (net_buffer_private*)_from;
1959 	net_buffer_private* to = (net_buffer_private*)_to;
1960 
1961 	if (from == NULL || to == NULL)
1962 		return NULL;
1963 
1964 	if (from->ancillary_data == NULL)
1965 		return NULL;
1966 
1967 	if (to->ancillary_data == NULL) {
1968 		// no ancillary data in the target buffer
1969 		to->ancillary_data = from->ancillary_data;
1970 		from->ancillary_data = NULL;
1971 		return next_ancillary_data(to->ancillary_data, NULL, NULL);
1972 	}
1973 
1974 	// both have ancillary data
1975 	void* data = move_ancillary_data(from->ancillary_data,
1976 		to->ancillary_data);
1977 	delete_ancillary_data_container(from->ancillary_data);
1978 	from->ancillary_data = NULL;
1979 
1980 	return data;
1981 }
1982 
1983 
1984 /*!	Tries to directly access the requested space in the buffer.
1985 	If the space is contiguous, the function will succeed and place a pointer
1986 	to that space into \a _contiguousBuffer.
1987 
1988 	\return B_BAD_VALUE if the offset is outside of the buffer's bounds.
1989 	\return B_ERROR in case the buffer is not contiguous at that location.
1990 */
1991 static status_t
1992 direct_access(net_buffer* _buffer, uint32 offset, size_t size,
1993 	void** _contiguousBuffer)
1994 {
1995 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
1996 
1997 	ParanoiaChecker _(buffer);
1998 
1999 	//TRACE(("direct_access(buffer %p, offset %ld, size %ld)\n", buffer, offset,
2000 	//	size));
2001 
2002 	if (offset + size > buffer->size)
2003 		return B_BAD_VALUE;
2004 
2005 	// find node to access
2006 	data_node* node = get_node_at_offset(buffer, offset);
2007 	if (node == NULL)
2008 		return B_BAD_VALUE;
2009 
2010 	offset -= node->offset;
2011 
2012 	if (size > node->used - offset)
2013 		return B_ERROR;
2014 
2015 	*_contiguousBuffer = node->start + offset;
2016 	return B_OK;
2017 }
2018 
2019 
2020 static int32
2021 checksum_data(net_buffer* _buffer, uint32 offset, size_t size, bool finalize)
2022 {
2023 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
2024 
2025 	if (offset + size > buffer->size || size == 0)
2026 		return B_BAD_VALUE;
2027 
2028 	// find first node to read from
2029 	data_node* node = get_node_at_offset(buffer, offset);
2030 	if (node == NULL)
2031 		return B_ERROR;
2032 
2033 	offset -= node->offset;
2034 
2035 	// Since the maximum buffer size is 65536 bytes, it's impossible
2036 	// to overlap 32 bit - we don't need to handle this overlap in
2037 	// the loop, we can safely do it afterwards
2038 	uint32 sum = 0;
2039 
2040 	while (true) {
2041 		size_t bytes = min_c(size, node->used - offset);
2042 		if ((offset + node->offset) & 1) {
2043 			// if we're at an uneven offset, we have to swap the checksum
2044 			sum += __swap_int16(compute_checksum(node->start + offset, bytes));
2045 		} else
2046 			sum += compute_checksum(node->start + offset, bytes);
2047 
2048 		size -= bytes;
2049 		if (size == 0)
2050 			break;
2051 
2052 		offset = 0;
2053 
2054 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
2055 		if (node == NULL)
2056 			return B_ERROR;
2057 	}
2058 
2059 	while (sum >> 16) {
2060 		sum = (sum & 0xffff) + (sum >> 16);
2061 	}
2062 
2063 	if (!finalize)
2064 		return (uint16)sum;
2065 
2066 	return (uint16)~sum;
2067 }
2068 
2069 
2070 static uint32
2071 get_iovecs(net_buffer* _buffer, struct iovec* iovecs, uint32 vecCount)
2072 {
2073 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
2074 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
2075 	uint32 count = 0;
2076 
2077 	while (node != NULL && count < vecCount) {
2078 		if (node->used > 0) {
2079 			iovecs[count].iov_base = node->start;
2080 			iovecs[count].iov_len = node->used;
2081 			count++;
2082 		}
2083 
2084 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
2085 	}
2086 
2087 	return count;
2088 }
2089 
2090 
2091 static uint32
2092 count_iovecs(net_buffer* _buffer)
2093 {
2094 	net_buffer_private* buffer = (net_buffer_private*)_buffer;
2095 	data_node* node = (data_node*)list_get_first_item(&buffer->buffers);
2096 	uint32 count = 0;
2097 
2098 	while (node != NULL) {
2099 		if (node->used > 0)
2100 			count++;
2101 
2102 		node = (data_node*)list_get_next_item(&buffer->buffers, node);
2103 	}
2104 
2105 	return count;
2106 }
2107 
2108 
2109 static void
2110 swap_addresses(net_buffer* buffer)
2111 {
2112 	std::swap(buffer->source, buffer->destination);
2113 }
2114 
2115 
2116 static status_t
2117 std_ops(int32 op, ...)
2118 {
2119 	switch (op) {
2120 		case B_MODULE_INIT:
2121 			// TODO: improve our code a bit so we can add constructors
2122 			//	and keep around half-constructed buffers in the slab
2123 
2124 			sNetBufferCache = create_object_cache("net buffer cache",
2125 				sizeof(net_buffer_private), 8, NULL, NULL, NULL);
2126 			if (sNetBufferCache == NULL)
2127 				return B_NO_MEMORY;
2128 
2129 			sDataNodeCache = create_object_cache("data node cache", BUFFER_SIZE,
2130 				0, NULL, NULL, NULL);
2131 			if (sDataNodeCache == NULL) {
2132 				delete_object_cache(sNetBufferCache);
2133 				return B_NO_MEMORY;
2134 			}
2135 
2136 #if ENABLE_STATS
2137 			add_debugger_command_etc("net_buffer_stats", &dump_net_buffer_stats,
2138 				"Print net buffer statistics",
2139 				"\nPrint net buffer statistics.\n", 0);
2140 #endif
2141 #if ENABLE_DEBUGGER_COMMANDS
2142 			add_debugger_command_etc("net_buffer", &dump_net_buffer,
2143 				"Dump net buffer",
2144 				"\nDump the net buffer's internal structures.\n", 0);
2145 #endif
2146 			return B_OK;
2147 
2148 		case B_MODULE_UNINIT:
2149 #if ENABLE_STATS
2150 			remove_debugger_command("net_buffer_stats", &dump_net_buffer_stats);
2151 #endif
2152 #if ENABLE_DEBUGGER_COMMANDS
2153 			remove_debugger_command("net_buffer", &dump_net_buffer);
2154 #endif
2155 			delete_object_cache(sNetBufferCache);
2156 			delete_object_cache(sDataNodeCache);
2157 			return B_OK;
2158 
2159 		default:
2160 			return B_ERROR;
2161 	}
2162 }
2163 
2164 
2165 net_buffer_module_info gNetBufferModule = {
2166 	{
2167 		NET_BUFFER_MODULE_NAME,
2168 		0,
2169 		std_ops
2170 	},
2171 	create_buffer,
2172 	free_buffer,
2173 
2174 	duplicate_buffer,
2175 	clone_buffer,
2176 	split_buffer,
2177 	merge_buffer,
2178 
2179 	prepend_size,
2180 	prepend_data,
2181 	append_size,
2182 	append_data,
2183 	NULL,	// insert
2184 	NULL,	// remove
2185 	remove_header,
2186 	remove_trailer,
2187 	trim_data,
2188 	append_cloned_data,
2189 
2190 	NULL,	// associate_data
2191 
2192 	set_ancillary_data,
2193 	get_ancillary_data,
2194 	transfer_ancillary_data,
2195 
2196 	direct_access,
2197 	read_data,
2198 	write_data,
2199 
2200 	checksum_data,
2201 
2202 	NULL,	// get_memory_map
2203 	get_iovecs,
2204 	count_iovecs,
2205 
2206 	swap_addresses,
2207 
2208 	dump_buffer,	// dump
2209 };
2210 
2211