xref: /haiku/src/add-ons/kernel/network/protocols/tcp/TCPEndpoint.cpp (revision 362f1bd35b70e0c904bacb00f4708fbd6bc0173c)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Andrew Galante, haiku.galante@gmail.com
7  *		Axel Dörfler, axeld@pinc-software.de
8  *		Hugo Santos, hugosantos@gmail.com
9  */
10 
11 
12 #include "TCPEndpoint.h"
13 
14 #include <netinet/in.h>
15 #include <netinet/ip.h>
16 #include <netinet/tcp.h>
17 #include <new>
18 #include <signal.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <stdint.h>
22 
23 #include <KernelExport.h>
24 #include <Select.h>
25 
26 #include <net_buffer.h>
27 #include <net_datalink.h>
28 #include <net_stat.h>
29 #include <NetBufferUtilities.h>
30 #include <NetUtilities.h>
31 
32 #include <lock.h>
33 #include <tracing.h>
34 #include <util/AutoLock.h>
35 #include <util/list.h>
36 
37 #include "EndpointManager.h"
38 
39 
40 // References:
41 //	- RFC 793 - Transmission Control Protocol
42 //	- RFC 813 - Window and Acknowledgement Strategy in TCP
43 //	- RFC 1337 - TIME_WAIT Assassination Hazards in TCP
44 //
45 // Things this implementation currently doesn't implement:
46 //	- TCP Slow Start, Congestion Avoidance, Fast Retransmit, and Fast Recovery,
47 //	  RFC 2001, RFC 2581, RFC 3042
48 //	- NewReno Modification to TCP's Fast Recovery, RFC 2582
49 //	- Explicit Congestion Notification (ECN), RFC 3168
50 //	- SYN-Cache
51 //	- SACK, Selective Acknowledgment - RFC 2018, RFC 2883, RFC 3517
52 //	- Forward RTO-Recovery, RFC 4138
53 //	- Time-Wait hash instead of keeping sockets alive
54 //
55 // Things incomplete in this implementation:
56 //	- TCP Extensions for High Performance, RFC 1323 - RTTM, PAWS
57 
58 #define PrintAddress(address) \
59 	AddressString(Domain(), address, true).Data()
60 
61 //#define TRACE_TCP
62 //#define PROBE_TCP
63 
64 #ifdef TRACE_TCP
65 // the space before ', ##args' is important in order for this to work with cpp 2.95
66 #	define TRACE(format, args...)	dprintf("%" B_PRId32 ": TCP [%" \
67 		B_PRIdBIGTIME "] %p (%12s) " format "\n", find_thread(NULL), \
68 		system_time(), this, name_for_state(fState) , ##args)
69 #else
70 #	define TRACE(args...)			do { } while (0)
71 #endif
72 
73 #ifdef PROBE_TCP
74 #	define PROBE(buffer, window) \
75 	dprintf("TCP PROBE %" B_PRIdBIGTIME " %s %s %" B_PRIu32 " snxt %" B_PRIu32 \
76 		" suna %" B_PRIu32 " cw %" B_PRIu32 " sst %" B_PRIu32 " win %" \
77 		B_PRIu32 " swin %" B_PRIu32 " smax-suna %" B_PRIu32 " savail %" \
78 		B_PRIuSIZE " sqused %" B_PRIuSIZE " rto %" B_PRIdBIGTIME "\n", \
79 		system_time(), PrintAddress(buffer->source), \
80 		PrintAddress(buffer->destination), buffer->size, fSendNext.Number(), \
81 		fSendUnacknowledged.Number(), fCongestionWindow, fSlowStartThreshold, \
82 		window, fSendWindow, (fSendMax - fSendUnacknowledged).Number(), \
83 		fSendQueue.Available(fSendNext), fSendQueue.Used(), fRetransmitTimeout)
84 #else
85 #	define PROBE(buffer, window)	do { } while (0)
86 #endif
87 
88 #if TCP_TRACING
89 namespace TCPTracing {
90 
91 class Receive : public AbstractTraceEntry {
92 public:
93 	Receive(TCPEndpoint* endpoint, tcp_segment_header& segment, uint32 window,
94 			net_buffer* buffer)
95 		:
96 		fEndpoint(endpoint),
97 		fBuffer(buffer),
98 		fBufferSize(buffer->size),
99 		fSequence(segment.sequence),
100 		fAcknowledge(segment.acknowledge),
101 		fWindow(window),
102 		fState(endpoint->State()),
103 		fFlags(segment.flags)
104 	{
105 		Initialized();
106 	}
107 
108 	virtual void AddDump(TraceOutput& out)
109 	{
110 		out.Print("tcp:%p (%12s) receive buffer %p (%" B_PRIu32 " bytes), "
111 			"flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32
112 			", wnd %" B_PRIu32, fEndpoint, name_for_state(fState), fBuffer,
113 			fBufferSize, fFlags, fSequence, fAcknowledge, fWindow);
114 	}
115 
116 protected:
117 	TCPEndpoint*	fEndpoint;
118 	net_buffer*		fBuffer;
119 	uint32			fBufferSize;
120 	uint32			fSequence;
121 	uint32			fAcknowledge;
122 	uint32			fWindow;
123 	tcp_state		fState;
124 	uint8			fFlags;
125 };
126 
127 class Send : public AbstractTraceEntry {
128 public:
129 	Send(TCPEndpoint* endpoint, tcp_segment_header& segment, net_buffer* buffer,
130 			tcp_sequence firstSequence, tcp_sequence lastSequence)
131 		:
132 		fEndpoint(endpoint),
133 		fBuffer(buffer),
134 		fBufferSize(buffer->size),
135 		fSequence(segment.sequence),
136 		fAcknowledge(segment.acknowledge),
137 		fFirstSequence(firstSequence.Number()),
138 		fLastSequence(lastSequence.Number()),
139 		fState(endpoint->State()),
140 		fFlags(segment.flags)
141 	{
142 		Initialized();
143 	}
144 
145 	virtual void AddDump(TraceOutput& out)
146 	{
147 		out.Print("tcp:%p (%12s) send buffer %p (%" B_PRIu32 " bytes), "
148 			"flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32
149 			", first %" B_PRIu32 ", last %" B_PRIu32, fEndpoint,
150 			name_for_state(fState), fBuffer, fBufferSize, fFlags, fSequence,
151 			fAcknowledge, fFirstSequence, fLastSequence);
152 	}
153 
154 protected:
155 	TCPEndpoint*	fEndpoint;
156 	net_buffer*		fBuffer;
157 	uint32			fBufferSize;
158 	uint32			fSequence;
159 	uint32			fAcknowledge;
160 	uint32			fFirstSequence;
161 	uint32			fLastSequence;
162 	tcp_state		fState;
163 	uint8			fFlags;
164 };
165 
166 class State : public AbstractTraceEntry {
167 public:
168 	State(TCPEndpoint* endpoint)
169 		:
170 		fEndpoint(endpoint),
171 		fState(endpoint->State())
172 	{
173 		Initialized();
174 	}
175 
176 	virtual void AddDump(TraceOutput& out)
177 	{
178 		out.Print("tcp:%p (%12s) state change", fEndpoint,
179 			name_for_state(fState));
180 	}
181 
182 protected:
183 	TCPEndpoint*	fEndpoint;
184 	tcp_state		fState;
185 };
186 
187 class Spawn : public AbstractTraceEntry {
188 public:
189 	Spawn(TCPEndpoint* listeningEndpoint, TCPEndpoint* spawnedEndpoint)
190 		:
191 		fListeningEndpoint(listeningEndpoint),
192 		fSpawnedEndpoint(spawnedEndpoint)
193 	{
194 		Initialized();
195 	}
196 
197 	virtual void AddDump(TraceOutput& out)
198 	{
199 		out.Print("tcp:%p spawns %p", fListeningEndpoint, fSpawnedEndpoint);
200 	}
201 
202 protected:
203 	TCPEndpoint*	fListeningEndpoint;
204 	TCPEndpoint*	fSpawnedEndpoint;
205 };
206 
207 class Error : public AbstractTraceEntry {
208 public:
209 	Error(TCPEndpoint* endpoint, const char* error, int32 line)
210 		:
211 		fEndpoint(endpoint),
212 		fLine(line),
213 		fError(error),
214 		fState(endpoint->State())
215 	{
216 		Initialized();
217 	}
218 
219 	virtual void AddDump(TraceOutput& out)
220 	{
221 		out.Print("tcp:%p (%12s) error at line %" B_PRId32 ": %s", fEndpoint,
222 			name_for_state(fState), fLine, fError);
223 	}
224 
225 protected:
226 	TCPEndpoint*	fEndpoint;
227 	int32			fLine;
228 	const char*		fError;
229 	tcp_state		fState;
230 };
231 
232 class TimerSet : public AbstractTraceEntry {
233 public:
234 	TimerSet(TCPEndpoint* endpoint, const char* which, bigtime_t timeout)
235 		:
236 		fEndpoint(endpoint),
237 		fWhich(which),
238 		fTimeout(timeout),
239 		fState(endpoint->State())
240 	{
241 		Initialized();
242 	}
243 
244 	virtual void AddDump(TraceOutput& out)
245 	{
246 		out.Print("tcp:%p (%12s) %s timer set to %" B_PRIdBIGTIME, fEndpoint,
247 			name_for_state(fState), fWhich, fTimeout);
248 	}
249 
250 protected:
251 	TCPEndpoint*	fEndpoint;
252 	const char*		fWhich;
253 	bigtime_t		fTimeout;
254 	tcp_state		fState;
255 };
256 
257 class TimerTriggered : public AbstractTraceEntry {
258 public:
259 	TimerTriggered(TCPEndpoint* endpoint, const char* which)
260 		:
261 		fEndpoint(endpoint),
262 		fWhich(which),
263 		fState(endpoint->State())
264 	{
265 		Initialized();
266 	}
267 
268 	virtual void AddDump(TraceOutput& out)
269 	{
270 		out.Print("tcp:%p (%12s) %s timer triggered", fEndpoint,
271 			name_for_state(fState), fWhich);
272 	}
273 
274 protected:
275 	TCPEndpoint*	fEndpoint;
276 	const char*		fWhich;
277 	tcp_state		fState;
278 };
279 
280 class APICall : public AbstractTraceEntry {
281 public:
282 	APICall(TCPEndpoint* endpoint, const char* which)
283 		:
284 		fEndpoint(endpoint),
285 		fWhich(which),
286 		fState(endpoint->State())
287 	{
288 		Initialized();
289 	}
290 
291 	virtual void AddDump(TraceOutput& out)
292 	{
293 		out.Print("tcp:%p (%12s) api call: %s", fEndpoint,
294 			name_for_state(fState), fWhich);
295 	}
296 
297 protected:
298 	TCPEndpoint*	fEndpoint;
299 	const char*		fWhich;
300 	tcp_state		fState;
301 };
302 
303 }	// namespace TCPTracing
304 
305 #	define T(x)	new(std::nothrow) TCPTracing::x
306 #else
307 #	define T(x)
308 #endif	// TCP_TRACING
309 
310 
311 // constants for the fFlags field
312 enum {
313 	FLAG_OPTION_WINDOW_SCALE	= 0x01,
314 	FLAG_OPTION_TIMESTAMP		= 0x02,
315 	// TODO: Should FLAG_NO_RECEIVE apply as well to received connections?
316 	//       That is, what is expected from accept() after a shutdown()
317 	//       is performed on a listen()ing socket.
318 	FLAG_NO_RECEIVE				= 0x04,
319 	FLAG_CLOSED					= 0x08,
320 	FLAG_DELETE_ON_CLOSE		= 0x10,
321 	FLAG_LOCAL					= 0x20,
322 	FLAG_RECOVERY				= 0x40
323 };
324 
325 
326 static const int kTimestampFactor = 1000;
327 	// conversion factor between usec system time and msec tcp time
328 
329 
330 static inline bigtime_t
331 absolute_timeout(bigtime_t timeout)
332 {
333 	if (timeout == 0 || timeout == B_INFINITE_TIMEOUT)
334 		return timeout;
335 
336 	return timeout + system_time();
337 }
338 
339 
340 static inline status_t
341 posix_error(status_t error)
342 {
343 	if (error == B_TIMED_OUT)
344 		return B_WOULD_BLOCK;
345 
346 	return error;
347 }
348 
349 
350 static inline bool
351 in_window(const tcp_sequence& sequence, const tcp_sequence& receiveNext,
352 	uint32 receiveWindow)
353 {
354 	return sequence >= receiveNext && sequence < (receiveNext + receiveWindow);
355 }
356 
357 
358 static inline bool
359 segment_in_sequence(const tcp_segment_header& segment, int size,
360 	const tcp_sequence& receiveNext, uint32 receiveWindow)
361 {
362 	tcp_sequence sequence(segment.sequence);
363 	if (size == 0) {
364 		if (receiveWindow == 0)
365 			return sequence == receiveNext;
366 		return in_window(sequence, receiveNext, receiveWindow);
367 	} else {
368 		if (receiveWindow == 0)
369 			return false;
370 		return in_window(sequence, receiveNext, receiveWindow)
371 			|| in_window(sequence + size - 1, receiveNext, receiveWindow);
372 	}
373 }
374 
375 
376 static inline bool
377 is_writable(tcp_state state)
378 {
379 	return state == ESTABLISHED || state == FINISH_RECEIVED;
380 }
381 
382 
383 static inline bool
384 is_establishing(tcp_state state)
385 {
386 	return state == SYNCHRONIZE_SENT || state == SYNCHRONIZE_RECEIVED;
387 }
388 
389 
390 static inline uint32 tcp_now()
391 {
392 	return system_time() / kTimestampFactor;
393 }
394 
395 
396 static inline uint32 tcp_diff_timestamp(uint32 base)
397 {
398 	uint32 now = tcp_now();
399 
400 	if (now > base)
401 		return now - base;
402 
403 	return now + UINT_MAX - base;
404 }
405 
406 
407 static inline bool
408 state_needs_finish(int32 state)
409 {
410 	return state == WAIT_FOR_FINISH_ACKNOWLEDGE
411 		|| state == FINISH_SENT || state == CLOSING;
412 }
413 
414 
415 //	#pragma mark -
416 
417 
418 TCPEndpoint::TCPEndpoint(net_socket* socket)
419 	:
420 	ProtocolSocket(socket),
421 	fManager(NULL),
422 	fOptions(0),
423 	fSendWindowShift(0),
424 	fReceiveWindowShift(0),
425 	fSendUnacknowledged(0),
426 	fSendNext(0),
427 	fSendMax(0),
428 	fSendUrgentOffset(0),
429 	fSendWindow(0),
430 	fSendMaxWindow(0),
431 	fSendMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE),
432 	fSendMaxSegments(0),
433 	fSendQueue(socket->send.buffer_size),
434 	fInitialSendSequence(0),
435 	fPreviousHighestAcknowledge(0),
436 	fDuplicateAcknowledgeCount(0),
437 	fPreviousFlightSize(0),
438 	fRecover(0),
439 	fRoute(NULL),
440 	fReceiveNext(0),
441 	fReceiveMaxAdvertised(0),
442 	fReceiveWindow(socket->receive.buffer_size),
443 	fReceiveMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE),
444 	fReceiveQueue(socket->receive.buffer_size),
445 	fSmoothedRoundTripTime(0),
446 	fRoundTripVariation(0),
447 	fSendTime(0),
448 	fRoundTripStartSequence(0),
449 	fRetransmitTimeout(TCP_INITIAL_RTT),
450 	fReceivedTimestamp(0),
451 	fCongestionWindow(0),
452 	fSlowStartThreshold(0),
453 	fState(CLOSED),
454 	fFlags(FLAG_OPTION_WINDOW_SCALE | FLAG_OPTION_TIMESTAMP)
455 {
456 	// TODO: to be replaced with a real read/write locking strategy!
457 	mutex_init(&fLock, "tcp lock");
458 
459 	fReceiveCondition.Init(this, "tcp receive");
460 	fSendCondition.Init(this, "tcp send");
461 
462 	gStackModule->init_timer(&fPersistTimer, TCPEndpoint::_PersistTimer, this);
463 	gStackModule->init_timer(&fRetransmitTimer, TCPEndpoint::_RetransmitTimer,
464 		this);
465 	gStackModule->init_timer(&fDelayedAcknowledgeTimer,
466 		TCPEndpoint::_DelayedAcknowledgeTimer, this);
467 	gStackModule->init_timer(&fTimeWaitTimer, TCPEndpoint::_TimeWaitTimer,
468 		this);
469 
470 	T(APICall(this, "constructor"));
471 }
472 
473 
474 TCPEndpoint::~TCPEndpoint()
475 {
476 	mutex_lock(&fLock);
477 
478 	T(APICall(this, "destructor"));
479 
480 	_CancelConnectionTimers();
481 	gStackModule->cancel_timer(&fTimeWaitTimer);
482 	T(TimerSet(this, "time-wait", -1));
483 
484 	if (fManager != NULL) {
485 		fManager->Unbind(this);
486 		put_endpoint_manager(fManager);
487 	}
488 
489 	mutex_destroy(&fLock);
490 
491 	// we need to wait for all timers to return
492 	gStackModule->wait_for_timer(&fRetransmitTimer);
493 	gStackModule->wait_for_timer(&fPersistTimer);
494 	gStackModule->wait_for_timer(&fDelayedAcknowledgeTimer);
495 	gStackModule->wait_for_timer(&fTimeWaitTimer);
496 
497 	gDatalinkModule->put_route(Domain(), fRoute);
498 }
499 
500 
501 status_t
502 TCPEndpoint::InitCheck() const
503 {
504 	return B_OK;
505 }
506 
507 
508 //	#pragma mark - protocol API
509 
510 
511 status_t
512 TCPEndpoint::Open()
513 {
514 	TRACE("Open()");
515 	T(APICall(this, "open"));
516 
517 	status_t status = ProtocolSocket::Open();
518 	if (status < B_OK)
519 		return status;
520 
521 	fManager = get_endpoint_manager(Domain());
522 	if (fManager == NULL)
523 		return EAFNOSUPPORT;
524 
525 	return B_OK;
526 }
527 
528 
529 status_t
530 TCPEndpoint::Close()
531 {
532 	MutexLocker locker(fLock);
533 
534 	TRACE("Close()");
535 	T(APICall(this, "close"));
536 
537 	if (fState == LISTEN)
538 		delete_sem(fAcceptSemaphore);
539 
540 	if (fState == SYNCHRONIZE_SENT || fState == LISTEN) {
541 		// TODO: what about linger in case of SYNCHRONIZE_SENT?
542 		fState = CLOSED;
543 		T(State(this));
544 		return B_OK;
545 	}
546 
547 	status_t status = _Disconnect(true);
548 	if (status != B_OK)
549 		return status;
550 
551 	if (socket->options & SO_LINGER) {
552 		TRACE("Close(): Lingering for %i secs", socket->linger);
553 
554 		bigtime_t maximum = absolute_timeout(socket->linger * 1000000LL);
555 
556 		while (fSendQueue.Used() > 0) {
557 			status = _WaitForCondition(fSendCondition, locker, maximum);
558 			if (status == B_TIMED_OUT || status == B_WOULD_BLOCK)
559 				break;
560 			else if (status < B_OK)
561 				return status;
562 		}
563 
564 		TRACE("Close(): after waiting, the SendQ was left with %" B_PRIuSIZE
565 			" bytes.", fSendQueue.Used());
566 	}
567 	return B_OK;
568 }
569 
570 
571 void
572 TCPEndpoint::Free()
573 {
574 	MutexLocker _(fLock);
575 
576 	TRACE("Free()");
577 	T(APICall(this, "free"));
578 
579 	if (fState <= SYNCHRONIZE_SENT)
580 		return;
581 
582 	// we are only interested in the timer, not in changing state
583 	_EnterTimeWait();
584 
585 	fFlags |= FLAG_CLOSED;
586 	if ((fFlags & FLAG_DELETE_ON_CLOSE) == 0) {
587 		// we'll be freed later when the 2MSL timer expires
588 		gSocketModule->acquire_socket(socket);
589 	}
590 }
591 
592 
593 /*!	Creates and sends a synchronize packet to /a address, and then waits
594 	until the connection has been established or refused.
595 */
596 status_t
597 TCPEndpoint::Connect(const sockaddr* address)
598 {
599 	if (!AddressModule()->is_same_family(address))
600 		return EAFNOSUPPORT;
601 
602 	MutexLocker locker(fLock);
603 
604 	TRACE("Connect() on address %s", PrintAddress(address));
605 	T(APICall(this, "connect"));
606 
607 	if (gStackModule->is_restarted_syscall()) {
608 		bigtime_t timeout = gStackModule->restore_syscall_restart_timeout();
609 		status_t status = _WaitForEstablished(locker, timeout);
610 		TRACE("  Connect(): Connection complete: %s (timeout was %"
611 			B_PRIdBIGTIME ")", strerror(status), timeout);
612 		return posix_error(status);
613 	}
614 
615 	// Can only call connect() from CLOSED or LISTEN states
616 	// otherwise endpoint is considered already connected
617 	if (fState == LISTEN) {
618 		// this socket is about to connect; remove pending connections in the backlog
619 		gSocketModule->set_max_backlog(socket, 0);
620 	} else if (fState == ESTABLISHED) {
621 		return EISCONN;
622 	} else if (fState != CLOSED)
623 		return EALREADY;
624 
625 	// consider destination address INADDR_ANY as INADDR_LOOPBACK
626 	sockaddr_storage _address;
627 	if (AddressModule()->is_empty_address(address, false)) {
628 		AddressModule()->get_loopback_address((sockaddr *)&_address);
629 		// for IPv4 and IPv6 the port is at the same offset
630 		((sockaddr_in &)_address).sin_port = ((sockaddr_in *)address)->sin_port;
631 		address = (sockaddr *)&_address;
632 	}
633 
634 	status_t status = _PrepareSendPath(address);
635 	if (status < B_OK)
636 		return status;
637 
638 	TRACE("  Connect(): starting 3-way handshake...");
639 
640 	fState = SYNCHRONIZE_SENT;
641 	T(State(this));
642 
643 	// send SYN
644 	status = _SendQueued();
645 	if (status != B_OK) {
646 		_Close();
647 		return status;
648 	}
649 
650 	// If we are running over Loopback, after _SendQueued() returns we
651 	// may be in ESTABLISHED already.
652 	if (fState == ESTABLISHED) {
653 		TRACE("  Connect() completed after _SendQueued()");
654 		return B_OK;
655 	}
656 
657 	// wait until 3-way handshake is complete (if needed)
658 	bigtime_t timeout = min_c(socket->send.timeout, TCP_CONNECTION_TIMEOUT);
659 	if (timeout == 0) {
660 		// we're a non-blocking socket
661 		TRACE("  Connect() delayed, return EINPROGRESS");
662 		return EINPROGRESS;
663 	}
664 
665 	bigtime_t absoluteTimeout = absolute_timeout(timeout);
666 	gStackModule->store_syscall_restart_timeout(absoluteTimeout);
667 
668 	status = _WaitForEstablished(locker, absoluteTimeout);
669 	TRACE("  Connect(): Connection complete: %s (timeout was %" B_PRIdBIGTIME
670 		")", strerror(status), timeout);
671 	return posix_error(status);
672 }
673 
674 
675 status_t
676 TCPEndpoint::Accept(struct net_socket** _acceptedSocket)
677 {
678 	MutexLocker locker(fLock);
679 
680 	TRACE("Accept()");
681 	T(APICall(this, "accept"));
682 
683 	status_t status;
684 	bigtime_t timeout = absolute_timeout(socket->receive.timeout);
685 	if (gStackModule->is_restarted_syscall())
686 		timeout = gStackModule->restore_syscall_restart_timeout();
687 	else
688 		gStackModule->store_syscall_restart_timeout(timeout);
689 
690 	do {
691 		locker.Unlock();
692 
693 		status = acquire_sem_etc(fAcceptSemaphore, 1, B_ABSOLUTE_TIMEOUT
694 			| B_CAN_INTERRUPT, timeout);
695 		if (status != B_OK) {
696 			if (status == B_TIMED_OUT && socket->receive.timeout == 0)
697 				return B_WOULD_BLOCK;
698 
699 			return status;
700 		}
701 
702 		locker.Lock();
703 		status = gSocketModule->dequeue_connected(socket, _acceptedSocket);
704 #ifdef TRACE_TCP
705 		if (status == B_OK)
706 			TRACE("  Accept() returning %p", (*_acceptedSocket)->first_protocol);
707 #endif
708 	} while (status != B_OK);
709 
710 	return status;
711 }
712 
713 
714 status_t
715 TCPEndpoint::Bind(const sockaddr *address)
716 {
717 	if (address == NULL)
718 		return B_BAD_VALUE;
719 
720 	MutexLocker lock(fLock);
721 
722 	TRACE("Bind() on address %s", PrintAddress(address));
723 	T(APICall(this, "bind"));
724 
725 	if (fState != CLOSED)
726 		return EISCONN;
727 
728 	return fManager->Bind(this, address);
729 }
730 
731 
732 status_t
733 TCPEndpoint::Unbind(struct sockaddr *address)
734 {
735 	MutexLocker _(fLock);
736 
737 	TRACE("Unbind()");
738 	T(APICall(this, "unbind"));
739 
740 	return fManager->Unbind(this);
741 }
742 
743 
744 status_t
745 TCPEndpoint::Listen(int count)
746 {
747 	MutexLocker _(fLock);
748 
749 	TRACE("Listen()");
750 	T(APICall(this, "listen"));
751 
752 	if (fState != CLOSED && fState != LISTEN)
753 		return B_BAD_VALUE;
754 
755 	if (fState == CLOSED) {
756 		fAcceptSemaphore = create_sem(0, "tcp accept");
757 		if (fAcceptSemaphore < B_OK)
758 			return ENOBUFS;
759 
760 		status_t status = fManager->SetPassive(this);
761 		if (status != B_OK) {
762 			delete_sem(fAcceptSemaphore);
763 			fAcceptSemaphore = -1;
764 			return status;
765 		}
766 	}
767 
768 	gSocketModule->set_max_backlog(socket, count);
769 
770 	fState = LISTEN;
771 	T(State(this));
772 	return B_OK;
773 }
774 
775 
776 status_t
777 TCPEndpoint::Shutdown(int direction)
778 {
779 	MutexLocker lock(fLock);
780 
781 	TRACE("Shutdown(%i)", direction);
782 	T(APICall(this, "shutdown"));
783 
784 	if (direction == SHUT_RD || direction == SHUT_RDWR)
785 		fFlags |= FLAG_NO_RECEIVE;
786 
787 	if (direction == SHUT_WR || direction == SHUT_RDWR) {
788 		// TODO: That's not correct. After read/write shutting down the socket
789 		// one should still be able to read previously arrived data.
790 		_Disconnect(false);
791 	}
792 
793 	return B_OK;
794 }
795 
796 
797 /*!	Puts data contained in \a buffer into send buffer */
798 status_t
799 TCPEndpoint::SendData(net_buffer *buffer)
800 {
801 	MutexLocker lock(fLock);
802 
803 	TRACE("SendData(buffer %p, size %" B_PRIu32 ", flags %#" B_PRIx32
804 		") [total %" B_PRIuSIZE " bytes, has %" B_PRIuSIZE "]", buffer,
805 		buffer->size, buffer->flags, fSendQueue.Size(), fSendQueue.Free());
806 	T(APICall(this, "senddata"));
807 
808 	uint32 flags = buffer->flags;
809 
810 	if (fState == CLOSED)
811 		return ENOTCONN;
812 	if (fState == LISTEN)
813 		return EDESTADDRREQ;
814 	if (!is_writable(fState) && !is_establishing(fState)) {
815 		// we only send signals when called from userland
816 		if (gStackModule->is_syscall() && (flags & MSG_NOSIGNAL) == 0)
817 			send_signal(find_thread(NULL), SIGPIPE);
818 		return EPIPE;
819 	}
820 
821 	size_t left = buffer->size;
822 
823 	bigtime_t timeout = absolute_timeout(socket->send.timeout);
824 	if (gStackModule->is_restarted_syscall())
825 		timeout = gStackModule->restore_syscall_restart_timeout();
826 	else
827 		gStackModule->store_syscall_restart_timeout(timeout);
828 
829 	while (left > 0) {
830 		while (fSendQueue.Free() < socket->send.low_water_mark) {
831 			// wait until enough space is available
832 			status_t status = _WaitForCondition(fSendCondition, lock, timeout);
833 			if (status < B_OK) {
834 				TRACE("  SendData() returning %s (%d)",
835 					strerror(posix_error(status)), (int)posix_error(status));
836 				return posix_error(status);
837 			}
838 
839 			if (!is_writable(fState) && !is_establishing(fState)) {
840 				// we only send signals when called from userland
841 				if (gStackModule->is_syscall())
842 					send_signal(find_thread(NULL), SIGPIPE);
843 				return EPIPE;
844 			}
845 		}
846 
847 		size_t size = fSendQueue.Free();
848 		if (size < left) {
849 			// we need to split the original buffer
850 			net_buffer* clone = gBufferModule->clone(buffer, false);
851 				// TODO: add offset/size parameter to net_buffer::clone() or
852 				// even a move_data() function, as this is a bit inefficient
853 			if (clone == NULL)
854 				return ENOBUFS;
855 
856 			status_t status = gBufferModule->trim(clone, size);
857 			if (status != B_OK) {
858 				gBufferModule->free(clone);
859 				return status;
860 			}
861 
862 			gBufferModule->remove_header(buffer, size);
863 			left -= size;
864 			fSendQueue.Add(clone);
865 		} else {
866 			left -= buffer->size;
867 			fSendQueue.Add(buffer);
868 		}
869 	}
870 
871 	TRACE("  SendData(): %" B_PRIuSIZE " bytes used.", fSendQueue.Used());
872 
873 	bool force = false;
874 	if ((flags & MSG_OOB) != 0) {
875 		fSendUrgentOffset = fSendQueue.LastSequence();
876 			// RFC 961 specifies that the urgent offset points to the last
877 			// byte of urgent data. However, this is commonly implemented as
878 			// here, ie. it points to the first byte after the urgent data.
879 		force = true;
880 	}
881 	if ((flags & MSG_EOF) != 0)
882 		_Disconnect(false);
883 
884 	if (fState == ESTABLISHED || fState == FINISH_RECEIVED)
885 		_SendQueued(force);
886 
887 	return B_OK;
888 }
889 
890 
891 ssize_t
892 TCPEndpoint::SendAvailable()
893 {
894 	MutexLocker locker(fLock);
895 
896 	ssize_t available;
897 
898 	if (is_writable(fState))
899 		available = fSendQueue.Free();
900 	else if (is_establishing(fState))
901 		available = 0;
902 	else
903 		available = EPIPE;
904 
905 	TRACE("SendAvailable(): %" B_PRIdSSIZE, available);
906 	T(APICall(this, "sendavailable"));
907 	return available;
908 }
909 
910 
911 status_t
912 TCPEndpoint::FillStat(net_stat *stat)
913 {
914 	MutexLocker _(fLock);
915 
916 	strlcpy(stat->state, name_for_state(fState), sizeof(stat->state));
917 	stat->receive_queue_size = fReceiveQueue.Available();
918 	stat->send_queue_size = fSendQueue.Used();
919 
920 	return B_OK;
921 }
922 
923 
924 status_t
925 TCPEndpoint::ReadData(size_t numBytes, uint32 flags, net_buffer** _buffer)
926 {
927 	MutexLocker locker(fLock);
928 
929 	TRACE("ReadData(%" B_PRIuSIZE " bytes, flags %#" B_PRIx32 ")", numBytes,
930 		flags);
931 	T(APICall(this, "readdata"));
932 
933 	*_buffer = NULL;
934 
935 	if (fState == CLOSED)
936 		return ENOTCONN;
937 
938 	bigtime_t timeout = absolute_timeout(socket->receive.timeout);
939 	if (gStackModule->is_restarted_syscall())
940 		timeout = gStackModule->restore_syscall_restart_timeout();
941 	else
942 		gStackModule->store_syscall_restart_timeout(timeout);
943 
944 	if (fState == SYNCHRONIZE_SENT || fState == SYNCHRONIZE_RECEIVED) {
945 		if (flags & MSG_DONTWAIT)
946 			return B_WOULD_BLOCK;
947 
948 		status_t status = _WaitForEstablished(locker, timeout);
949 		if (status < B_OK)
950 			return posix_error(status);
951 	}
952 
953 	size_t dataNeeded = socket->receive.low_water_mark;
954 
955 	// When MSG_WAITALL is set then the function should block
956 	// until the full amount of data can be returned.
957 	if (flags & MSG_WAITALL)
958 		dataNeeded = numBytes;
959 
960 	// TODO: add support for urgent data (MSG_OOB)
961 
962 	while (true) {
963 		if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE
964 			|| fState == TIME_WAIT) {
965 			// ``Connection closing''.
966 			return B_OK;
967 		}
968 
969 		if (fReceiveQueue.Available() > 0) {
970 			if (fReceiveQueue.Available() >= dataNeeded
971 				|| (fReceiveQueue.PushedData() > 0
972 					&& fReceiveQueue.PushedData() >= fReceiveQueue.Available()))
973 				break;
974 		} else if (fState == FINISH_RECEIVED) {
975 			// ``If no text is awaiting delivery, the RECEIVE will
976 			//   get a Connection closing''.
977 			return B_OK;
978 		}
979 
980 		if ((flags & MSG_DONTWAIT) != 0 || socket->receive.timeout == 0)
981 			return B_WOULD_BLOCK;
982 
983 		if ((fFlags & FLAG_NO_RECEIVE) != 0)
984 			return B_OK;
985 
986 		status_t status = _WaitForCondition(fReceiveCondition, locker, timeout);
987 		if (status < B_OK) {
988 			// The Open Group base specification mentions that EINTR should be
989 			// returned if the recv() is interrupted before _any data_ is
990 			// available. So we actually check if there is data, and if so,
991 			// push it to the user.
992 			if ((status == B_TIMED_OUT || status == B_INTERRUPTED)
993 				&& fReceiveQueue.Available() > 0)
994 				break;
995 
996 			return posix_error(status);
997 		}
998 	}
999 
1000 	TRACE("  ReadData(): %" B_PRIuSIZE " are available.",
1001 		fReceiveQueue.Available());
1002 
1003 	if (numBytes < fReceiveQueue.Available())
1004 		fReceiveCondition.NotifyAll();
1005 
1006 	bool clone = (flags & MSG_PEEK) != 0;
1007 
1008 	ssize_t receivedBytes = fReceiveQueue.Get(numBytes, !clone, _buffer);
1009 
1010 	TRACE("  ReadData(): %" B_PRIuSIZE " bytes kept.",
1011 		fReceiveQueue.Available());
1012 
1013 	// if we are opening the window, check if we should send an ACK
1014 	if (!clone)
1015 		SendAcknowledge(false);
1016 
1017 	return receivedBytes;
1018 }
1019 
1020 
1021 ssize_t
1022 TCPEndpoint::ReadAvailable()
1023 {
1024 	MutexLocker locker(fLock);
1025 
1026 	TRACE("ReadAvailable(): %" B_PRIdSSIZE, _AvailableData());
1027 	T(APICall(this, "readavailable"));
1028 
1029 	return _AvailableData();
1030 }
1031 
1032 
1033 status_t
1034 TCPEndpoint::SetSendBufferSize(size_t length)
1035 {
1036 	MutexLocker _(fLock);
1037 	fSendQueue.SetMaxBytes(length);
1038 	return B_OK;
1039 }
1040 
1041 
1042 status_t
1043 TCPEndpoint::SetReceiveBufferSize(size_t length)
1044 {
1045 	MutexLocker _(fLock);
1046 	fReceiveQueue.SetMaxBytes(length);
1047 	return B_OK;
1048 }
1049 
1050 
1051 status_t
1052 TCPEndpoint::GetOption(int option, void* _value, int* _length)
1053 {
1054 	if (*_length != sizeof(int))
1055 		return B_BAD_VALUE;
1056 
1057 	int* value = (int*)_value;
1058 
1059 	switch (option) {
1060 		case TCP_NODELAY:
1061 			if ((fOptions & TCP_NODELAY) != 0)
1062 				*value = 1;
1063 			else
1064 				*value = 0;
1065 			return B_OK;
1066 
1067 		case TCP_MAXSEG:
1068 			*value = fReceiveMaxSegmentSize;
1069 			return B_OK;
1070 
1071 		default:
1072 			return B_BAD_VALUE;
1073 	}
1074 }
1075 
1076 
1077 status_t
1078 TCPEndpoint::SetOption(int option, const void* _value, int length)
1079 {
1080 	if (option != TCP_NODELAY)
1081 		return B_BAD_VALUE;
1082 
1083 	if (length != sizeof(int))
1084 		return B_BAD_VALUE;
1085 
1086 	const int* value = (const int*)_value;
1087 
1088 	MutexLocker _(fLock);
1089 	if (*value)
1090 		fOptions |= TCP_NODELAY;
1091 	else
1092 		fOptions &= ~TCP_NODELAY;
1093 
1094 	return B_OK;
1095 }
1096 
1097 
1098 //	#pragma mark - misc
1099 
1100 
1101 bool
1102 TCPEndpoint::IsBound() const
1103 {
1104 	return !LocalAddress().IsEmpty(true);
1105 }
1106 
1107 
1108 bool
1109 TCPEndpoint::IsLocal() const
1110 {
1111 	return (fFlags & FLAG_LOCAL) != 0;
1112 }
1113 
1114 
1115 status_t
1116 TCPEndpoint::DelayedAcknowledge()
1117 {
1118 	if (gStackModule->cancel_timer(&fDelayedAcknowledgeTimer)) {
1119 		// timer was active, send an ACK now (with the exception above,
1120 		// we send every other ACK)
1121 		T(TimerSet(this, "delayed ack", -1));
1122 		return SendAcknowledge(true);
1123 	}
1124 
1125 	gStackModule->set_timer(&fDelayedAcknowledgeTimer,
1126 		TCP_DELAYED_ACKNOWLEDGE_TIMEOUT);
1127 	T(TimerSet(this, "delayed ack", TCP_DELAYED_ACKNOWLEDGE_TIMEOUT));
1128 	return B_OK;
1129 }
1130 
1131 
1132 status_t
1133 TCPEndpoint::SendAcknowledge(bool force)
1134 {
1135 	return _SendQueued(force, 0);
1136 }
1137 
1138 
1139 void
1140 TCPEndpoint::_StartPersistTimer()
1141 {
1142 	gStackModule->set_timer(&fPersistTimer, TCP_PERSIST_TIMEOUT);
1143 	T(TimerSet(this, "persist", TCP_PERSIST_TIMEOUT));
1144 }
1145 
1146 
1147 void
1148 TCPEndpoint::_EnterTimeWait()
1149 {
1150 	TRACE("_EnterTimeWait()");
1151 
1152 	if (fState == TIME_WAIT) {
1153 		_CancelConnectionTimers();
1154 
1155 		if (IsLocal()) {
1156 			// we do not use TIME_WAIT state for local connections
1157 			fFlags |= FLAG_DELETE_ON_CLOSE;
1158 			return;
1159 		}
1160 	}
1161 
1162 	_UpdateTimeWait();
1163 }
1164 
1165 
1166 void
1167 TCPEndpoint::_UpdateTimeWait()
1168 {
1169 	gStackModule->set_timer(&fTimeWaitTimer, TCP_MAX_SEGMENT_LIFETIME << 1);
1170 	T(TimerSet(this, "time-wait", TCP_MAX_SEGMENT_LIFETIME << 1));
1171 }
1172 
1173 
1174 void
1175 TCPEndpoint::_CancelConnectionTimers()
1176 {
1177 	gStackModule->cancel_timer(&fRetransmitTimer);
1178 	T(TimerSet(this, "retransmit", -1));
1179 	gStackModule->cancel_timer(&fPersistTimer);
1180 	T(TimerSet(this, "persist", -1));
1181 	gStackModule->cancel_timer(&fDelayedAcknowledgeTimer);
1182 	T(TimerSet(this, "delayed ack", -1));
1183 }
1184 
1185 
1186 /*!	Sends the FIN flag to the peer when the connection is still open.
1187 	Moves the endpoint to the next state depending on where it was.
1188 */
1189 status_t
1190 TCPEndpoint::_Disconnect(bool closing)
1191 {
1192 	tcp_state previousState = fState;
1193 
1194 	if (fState == SYNCHRONIZE_RECEIVED || fState == ESTABLISHED)
1195 		fState = FINISH_SENT;
1196 	else if (fState == FINISH_RECEIVED)
1197 		fState = WAIT_FOR_FINISH_ACKNOWLEDGE;
1198 	else
1199 		return B_OK;
1200 
1201 	T(State(this));
1202 
1203 	status_t status = _SendQueued();
1204 	if (status != B_OK) {
1205 		fState = previousState;
1206 		T(State(this));
1207 		return status;
1208 	}
1209 
1210 	return B_OK;
1211 }
1212 
1213 
1214 void
1215 TCPEndpoint::_MarkEstablished()
1216 {
1217 	fState = ESTABLISHED;
1218 	T(State(this));
1219 
1220 	gSocketModule->set_connected(socket);
1221 	if (gSocketModule->has_parent(socket))
1222 		release_sem_etc(fAcceptSemaphore, 1, B_DO_NOT_RESCHEDULE);
1223 
1224 	fSendCondition.NotifyAll();
1225 	gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Free());
1226 }
1227 
1228 
1229 status_t
1230 TCPEndpoint::_WaitForEstablished(MutexLocker &locker, bigtime_t timeout)
1231 {
1232 	// TODO: Checking for CLOSED seems correct, but breaks several neon tests.
1233 	// When investigating this, also have a look at _Close() and _HandleReset().
1234 	while (fState < ESTABLISHED/* && fState != CLOSED*/) {
1235 		if (socket->error != B_OK)
1236 			return socket->error;
1237 
1238 		status_t status = _WaitForCondition(fSendCondition, locker, timeout);
1239 		if (status < B_OK)
1240 			return status;
1241 	}
1242 
1243 	return B_OK;
1244 }
1245 
1246 
1247 //	#pragma mark - receive
1248 
1249 
1250 void
1251 TCPEndpoint::_Close()
1252 {
1253 	_CancelConnectionTimers();
1254 	fState = CLOSED;
1255 	T(State(this));
1256 
1257 	fFlags |= FLAG_DELETE_ON_CLOSE;
1258 
1259 	fSendCondition.NotifyAll();
1260 	_NotifyReader();
1261 
1262 	if (gSocketModule->has_parent(socket)) {
1263 		// We still have a parent - obviously, we haven't been accepted yet,
1264 		// so no one could ever close us.
1265 		_CancelConnectionTimers();
1266 		gSocketModule->set_aborted(socket);
1267 	}
1268 }
1269 
1270 
1271 void
1272 TCPEndpoint::_HandleReset(status_t error)
1273 {
1274 	socket->error = error;
1275 	_Close();
1276 
1277 	gSocketModule->notify(socket, B_SELECT_WRITE, error);
1278 	gSocketModule->notify(socket, B_SELECT_ERROR, error);
1279 }
1280 
1281 
1282 void
1283 TCPEndpoint::_DuplicateAcknowledge(tcp_segment_header &segment)
1284 {
1285 	if (fDuplicateAcknowledgeCount == 0)
1286 		fPreviousFlightSize = (fSendMax - fSendUnacknowledged).Number();
1287 
1288 	if (++fDuplicateAcknowledgeCount < 3) {
1289 		if (fSendQueue.Available(fSendMax) != 0  && fSendWindow != 0) {
1290 			fSendNext = fSendMax;
1291 			fCongestionWindow += fDuplicateAcknowledgeCount * fSendMaxSegmentSize;
1292 			_SendQueued();
1293 			TRACE("_DuplicateAcknowledge(): packet sent under limited transmit on receipt of dup ack");
1294 			fCongestionWindow -= fDuplicateAcknowledgeCount * fSendMaxSegmentSize;
1295 		}
1296 	}
1297 
1298 	if (fDuplicateAcknowledgeCount == 3) {
1299 		if ((segment.acknowledge - 1) > fRecover || (fCongestionWindow > fSendMaxSegmentSize &&
1300 			(fSendUnacknowledged - fPreviousHighestAcknowledge) <= 4 * fSendMaxSegmentSize)) {
1301 			fFlags |= FLAG_RECOVERY;
1302 			fRecover = fSendMax.Number() - 1;
1303 			fSlowStartThreshold = max_c(fPreviousFlightSize / 2, 2 * fSendMaxSegmentSize);
1304 			fCongestionWindow = fSlowStartThreshold + 3 * fSendMaxSegmentSize;
1305 			fSendNext = segment.acknowledge;
1306 			_SendQueued();
1307 			TRACE("_DuplicateAcknowledge(): packet sent under fast restransmit on the receipt of 3rd dup ack");
1308 		}
1309 	} else if (fDuplicateAcknowledgeCount > 3) {
1310 		uint32 flightSize = (fSendMax - fSendUnacknowledged).Number();
1311 		if ((fDuplicateAcknowledgeCount - 3) * fSendMaxSegmentSize <= flightSize)
1312 			fCongestionWindow += fSendMaxSegmentSize;
1313 		if (fSendQueue.Available(fSendMax) != 0) {
1314 			fSendNext = fSendMax;
1315 			_SendQueued();
1316 		}
1317 	}
1318 }
1319 
1320 
1321 void
1322 TCPEndpoint::_UpdateTimestamps(tcp_segment_header& segment,
1323 	size_t segmentLength)
1324 {
1325 	if (fFlags & FLAG_OPTION_TIMESTAMP) {
1326 		tcp_sequence sequence(segment.sequence);
1327 
1328 		if (fLastAcknowledgeSent >= sequence
1329 			&& fLastAcknowledgeSent < (sequence + segmentLength))
1330 			fReceivedTimestamp = segment.timestamp_value;
1331 	}
1332 }
1333 
1334 
1335 ssize_t
1336 TCPEndpoint::_AvailableData() const
1337 {
1338 	// TODO: Refer to the FLAG_NO_RECEIVE comment above regarding
1339 	//       the application of FLAG_NO_RECEIVE in listen()ing
1340 	//       sockets.
1341 	if (fState == LISTEN)
1342 		return gSocketModule->count_connected(socket);
1343 	if (fState == SYNCHRONIZE_SENT)
1344 		return 0;
1345 
1346 	ssize_t availableData = fReceiveQueue.Available();
1347 
1348 	if (availableData == 0 && !_ShouldReceive())
1349 		return ENOTCONN;
1350 
1351 	return availableData;
1352 }
1353 
1354 
1355 void
1356 TCPEndpoint::_NotifyReader()
1357 {
1358 	fReceiveCondition.NotifyAll();
1359 	gSocketModule->notify(socket, B_SELECT_READ, _AvailableData());
1360 }
1361 
1362 
1363 bool
1364 TCPEndpoint::_AddData(tcp_segment_header& segment, net_buffer* buffer)
1365 {
1366 	if ((segment.flags & TCP_FLAG_FINISH) != 0) {
1367 		// Remember the position of the finish received flag
1368 		fFinishReceived = true;
1369 		fFinishReceivedAt = segment.sequence + buffer->size;
1370 	}
1371 
1372 	fReceiveQueue.Add(buffer, segment.sequence);
1373 	fReceiveNext = fReceiveQueue.NextSequence();
1374 
1375 	if (fFinishReceived) {
1376 		// Set or reset the finish flag on the current segment
1377 		if (fReceiveNext < fFinishReceivedAt)
1378 			segment.flags &= ~TCP_FLAG_FINISH;
1379 		else
1380 			segment.flags |= TCP_FLAG_FINISH;
1381 	}
1382 
1383 	TRACE("  _AddData(): adding data, receive next = %" B_PRIu32 ". Now have %"
1384 		B_PRIuSIZE " bytes.", fReceiveNext.Number(), fReceiveQueue.Available());
1385 
1386 	if ((segment.flags & TCP_FLAG_PUSH) != 0)
1387 		fReceiveQueue.SetPushPointer();
1388 
1389 	return fReceiveQueue.Available() > 0;
1390 }
1391 
1392 
1393 void
1394 TCPEndpoint::_PrepareReceivePath(tcp_segment_header& segment)
1395 {
1396 	fInitialReceiveSequence = segment.sequence;
1397 	fFinishReceived = false;
1398 
1399 	// count the received SYN
1400 	segment.sequence++;
1401 
1402 	fReceiveNext = segment.sequence;
1403 	fReceiveQueue.SetInitialSequence(segment.sequence);
1404 
1405 	if ((fOptions & TCP_NOOPT) == 0) {
1406 		if (segment.max_segment_size > 0)
1407 			fSendMaxSegmentSize = segment.max_segment_size;
1408 
1409 		if (segment.options & TCP_HAS_WINDOW_SCALE) {
1410 			fFlags |= FLAG_OPTION_WINDOW_SCALE;
1411 			fSendWindowShift = segment.window_shift;
1412 		} else {
1413 			fFlags &= ~FLAG_OPTION_WINDOW_SCALE;
1414 			fReceiveWindowShift = 0;
1415 		}
1416 
1417 		if (segment.options & TCP_HAS_TIMESTAMPS) {
1418 			fFlags |= FLAG_OPTION_TIMESTAMP;
1419 			fReceivedTimestamp = segment.timestamp_value;
1420 		} else
1421 			fFlags &= ~FLAG_OPTION_TIMESTAMP;
1422 	}
1423 
1424 	if (fSendMaxSegmentSize > 2190)
1425 		fCongestionWindow = 2 * fSendMaxSegmentSize;
1426 	else if (fSendMaxSegmentSize > 1095)
1427 		fCongestionWindow = 3 * fSendMaxSegmentSize;
1428 	else
1429 		fCongestionWindow = 4 * fSendMaxSegmentSize;
1430 
1431 	fSendMaxSegments = fCongestionWindow / fSendMaxSegmentSize;
1432 	fSlowStartThreshold = (uint32)segment.advertised_window << fSendWindowShift;
1433 }
1434 
1435 
1436 bool
1437 TCPEndpoint::_ShouldReceive() const
1438 {
1439 	if ((fFlags & FLAG_NO_RECEIVE) != 0)
1440 		return false;
1441 
1442 	return fState == ESTABLISHED || fState == FINISH_SENT
1443 		|| fState == FINISH_ACKNOWLEDGED;
1444 }
1445 
1446 
1447 int32
1448 TCPEndpoint::_Spawn(TCPEndpoint* parent, tcp_segment_header& segment,
1449 	net_buffer* buffer)
1450 {
1451 	MutexLocker _(fLock);
1452 
1453 	// TODO error checking
1454 	ProtocolSocket::Open();
1455 
1456 	fState = SYNCHRONIZE_RECEIVED;
1457 	T(Spawn(parent, this));
1458 
1459 	fManager = parent->fManager;
1460 
1461 	LocalAddress().SetTo(buffer->destination);
1462 	PeerAddress().SetTo(buffer->source);
1463 
1464 	TRACE("Spawn()");
1465 
1466 	// TODO: proper error handling!
1467 	if (fManager->BindChild(this) != B_OK) {
1468 		T(Error(this, "binding failed", __LINE__));
1469 		return DROP;
1470 	}
1471 	if (_PrepareSendPath(*PeerAddress()) != B_OK) {
1472 		T(Error(this, "prepare send faild", __LINE__));
1473 		return DROP;
1474 	}
1475 
1476 	fOptions = parent->fOptions;
1477 	fAcceptSemaphore = parent->fAcceptSemaphore;
1478 
1479 	_PrepareReceivePath(segment);
1480 
1481 	// send SYN+ACK
1482 	if (_SendQueued() != B_OK) {
1483 		T(Error(this, "sending failed", __LINE__));
1484 		return DROP;
1485 	}
1486 
1487 	segment.flags &= ~TCP_FLAG_SYNCHRONIZE;
1488 		// we handled this flag now, it must not be set for further processing
1489 
1490 	return _Receive(segment, buffer);
1491 }
1492 
1493 
1494 int32
1495 TCPEndpoint::_ListenReceive(tcp_segment_header& segment, net_buffer* buffer)
1496 {
1497 	TRACE("ListenReceive()");
1498 
1499 	// Essentially, we accept only TCP_FLAG_SYNCHRONIZE in this state,
1500 	// but the error behaviour differs
1501 	if (segment.flags & TCP_FLAG_RESET)
1502 		return DROP;
1503 	if (segment.flags & TCP_FLAG_ACKNOWLEDGE)
1504 		return DROP | RESET;
1505 	if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0)
1506 		return DROP;
1507 
1508 	// TODO: drop broadcast/multicast
1509 
1510 	// spawn new endpoint for accept()
1511 	net_socket* newSocket;
1512 	if (gSocketModule->spawn_pending_socket(socket, &newSocket) < B_OK) {
1513 		T(Error(this, "spawning failed", __LINE__));
1514 		return DROP;
1515 	}
1516 
1517 	return ((TCPEndpoint *)newSocket->first_protocol)->_Spawn(this,
1518 		segment, buffer);
1519 }
1520 
1521 
1522 int32
1523 TCPEndpoint::_SynchronizeSentReceive(tcp_segment_header &segment,
1524 	net_buffer *buffer)
1525 {
1526 	TRACE("_SynchronizeSentReceive()");
1527 
1528 	if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0
1529 		&& (fInitialSendSequence >= segment.acknowledge
1530 			|| fSendMax < segment.acknowledge))
1531 		return DROP | RESET;
1532 
1533 	if (segment.flags & TCP_FLAG_RESET) {
1534 		_HandleReset(ECONNREFUSED);
1535 		return DROP;
1536 	}
1537 
1538 	if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0)
1539 		return DROP;
1540 
1541 	fSendUnacknowledged = segment.acknowledge;
1542 	_PrepareReceivePath(segment);
1543 
1544 	if (segment.flags & TCP_FLAG_ACKNOWLEDGE) {
1545 		_MarkEstablished();
1546 	} else {
1547 		// simultaneous open
1548 		fState = SYNCHRONIZE_RECEIVED;
1549 		T(State(this));
1550 	}
1551 
1552 	segment.flags &= ~TCP_FLAG_SYNCHRONIZE;
1553 		// we handled this flag now, it must not be set for further processing
1554 
1555 	return _Receive(segment, buffer) | IMMEDIATE_ACKNOWLEDGE;
1556 }
1557 
1558 
1559 int32
1560 TCPEndpoint::_Receive(tcp_segment_header& segment, net_buffer* buffer)
1561 {
1562 	// PAWS processing takes precedence over regular TCP acceptability check
1563 	if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0 && (segment.flags & TCP_FLAG_RESET) == 0) {
1564 		if ((segment.options & TCP_HAS_TIMESTAMPS) == 0)
1565 			return DROP;
1566 		if ((int32)(fReceivedTimestamp - segment.timestamp_value) > 0
1567 			&& (fReceivedTimestamp - segment.timestamp_value) <= INT32_MAX)
1568 			return DROP | IMMEDIATE_ACKNOWLEDGE;
1569 	}
1570 
1571 	uint32 advertisedWindow = (uint32)segment.advertised_window
1572 		<< fSendWindowShift;
1573 	size_t segmentLength = buffer->size;
1574 
1575 	// First, handle the most common case for uni-directional data transfer
1576 	// (known as header prediction - the segment must not change the window,
1577 	// and must be the expected sequence, and contain no control flags)
1578 
1579 	if (fState == ESTABLISHED
1580 		&& segment.AcknowledgeOnly()
1581 		&& fReceiveNext == segment.sequence
1582 		&& advertisedWindow > 0 && advertisedWindow == fSendWindow
1583 		&& fSendNext == fSendMax) {
1584 		_UpdateTimestamps(segment, segmentLength);
1585 
1586 		if (segmentLength == 0) {
1587 			// this is a pure acknowledge segment - we're on the sending end
1588 			if (fSendUnacknowledged < segment.acknowledge
1589 				&& fSendMax >= segment.acknowledge) {
1590 				_Acknowledged(segment);
1591 				return DROP;
1592 			}
1593 		} else if (segment.acknowledge == fSendUnacknowledged
1594 			&& fReceiveQueue.IsContiguous()
1595 			&& fReceiveQueue.Free() >= segmentLength
1596 			&& (fFlags & FLAG_NO_RECEIVE) == 0) {
1597 			if (_AddData(segment, buffer))
1598 				_NotifyReader();
1599 
1600 			return KEEP | ((segment.flags & TCP_FLAG_PUSH) != 0
1601 				? IMMEDIATE_ACKNOWLEDGE : ACKNOWLEDGE);
1602 		}
1603 	}
1604 
1605 	// The fast path was not applicable, so we continue with the standard
1606 	// processing of the incoming segment
1607 
1608 	ASSERT(fState != SYNCHRONIZE_SENT && fState != LISTEN);
1609 
1610 	if (fState != CLOSED && fState != TIME_WAIT) {
1611 		// Check sequence number
1612 		if (!segment_in_sequence(segment, segmentLength, fReceiveNext,
1613 				fReceiveWindow)) {
1614 			TRACE("  Receive(): segment out of window, next: %" B_PRIu32
1615 				" wnd: %" B_PRIu32, fReceiveNext.Number(), fReceiveWindow);
1616 			if ((segment.flags & TCP_FLAG_RESET) != 0) {
1617 				// TODO: this doesn't look right - review!
1618 				return DROP;
1619 			}
1620 			return DROP | IMMEDIATE_ACKNOWLEDGE;
1621 		}
1622 	}
1623 
1624 	if ((segment.flags & TCP_FLAG_RESET) != 0) {
1625 		// Is this a valid reset?
1626 		// We generally ignore resets in time wait state (see RFC 1337)
1627 		if (fLastAcknowledgeSent <= segment.sequence
1628 			&& tcp_sequence(segment.sequence) < (fLastAcknowledgeSent
1629 				+ fReceiveWindow)
1630 			&& fState != TIME_WAIT) {
1631 			status_t error;
1632 			if (fState == SYNCHRONIZE_RECEIVED)
1633 				error = ECONNREFUSED;
1634 			else if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE)
1635 				error = ENOTCONN;
1636 			else
1637 				error = ECONNRESET;
1638 
1639 			_HandleReset(error);
1640 		}
1641 
1642 		return DROP;
1643 	}
1644 
1645 	if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0
1646 		|| (fState == SYNCHRONIZE_RECEIVED
1647 			&& (fInitialReceiveSequence > segment.sequence
1648 				|| ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0
1649 					&& (fSendUnacknowledged > segment.acknowledge
1650 						|| fSendMax < segment.acknowledge))))) {
1651 		// reset the connection - either the initial SYN was faulty, or we
1652 		// received a SYN within the data stream
1653 		return DROP | RESET;
1654 	}
1655 
1656 	// TODO: Check this! Why do we advertize a window outside of what we should
1657 	// buffer?
1658 	fReceiveWindow = max_c(fReceiveQueue.Free(), fReceiveWindow);
1659 		// the window must not shrink
1660 
1661 	// trim buffer to be within the receive window
1662 	int32 drop = (int32)(fReceiveNext - segment.sequence).Number();
1663 	if (drop > 0) {
1664 		if ((uint32)drop > buffer->size
1665 			|| ((uint32)drop == buffer->size
1666 				&& (segment.flags & TCP_FLAG_FINISH) == 0)) {
1667 			// don't accidently remove a FIN we shouldn't remove
1668 			segment.flags &= ~TCP_FLAG_FINISH;
1669 			drop = buffer->size;
1670 		}
1671 
1672 		// remove duplicate data at the start
1673 		TRACE("* remove %" B_PRId32 " bytes from the start", drop);
1674 		gBufferModule->remove_header(buffer, drop);
1675 		segment.sequence += drop;
1676 	}
1677 
1678 	int32 action = KEEP;
1679 
1680 	// immediately acknowledge out-of-order segment to trigger fast-retransmit at the sender
1681 	if (drop != 0)
1682 		action |= IMMEDIATE_ACKNOWLEDGE;
1683 
1684 	drop = (int32)(segment.sequence + buffer->size
1685 		- (fReceiveNext + fReceiveWindow)).Number();
1686 	if (drop > 0) {
1687 		// remove data exceeding our window
1688 		if ((uint32)drop >= buffer->size) {
1689 			// if we can accept data, or the segment is not what we'd expect,
1690 			// drop the segment (an immediate acknowledge is always triggered)
1691 			if (fReceiveWindow != 0 || segment.sequence != fReceiveNext)
1692 				return DROP | IMMEDIATE_ACKNOWLEDGE;
1693 
1694 			action |= IMMEDIATE_ACKNOWLEDGE;
1695 		}
1696 
1697 		if ((segment.flags & TCP_FLAG_FINISH) != 0) {
1698 			// we need to remove the finish, too, as part of the data
1699 			drop--;
1700 		}
1701 
1702 		segment.flags &= ~(TCP_FLAG_FINISH | TCP_FLAG_PUSH);
1703 		TRACE("* remove %" B_PRId32 " bytes from the end", drop);
1704 		gBufferModule->remove_trailer(buffer, drop);
1705 	}
1706 
1707 #ifdef TRACE_TCP
1708 	if (advertisedWindow > fSendWindow) {
1709 		TRACE("  Receive(): Window update %" B_PRIu32 " -> %" B_PRIu32,
1710 			fSendWindow, advertisedWindow);
1711 	}
1712 #endif
1713 
1714 	if (advertisedWindow > fSendWindow)
1715 		action |= IMMEDIATE_ACKNOWLEDGE;
1716 
1717 	fSendWindow = advertisedWindow;
1718 	if (advertisedWindow > fSendMaxWindow)
1719 		fSendMaxWindow = advertisedWindow;
1720 
1721 	// Then look at the acknowledgement for any updates
1722 
1723 	if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0) {
1724 		// process acknowledged data
1725 		if (fState == SYNCHRONIZE_RECEIVED)
1726 			_MarkEstablished();
1727 
1728 		if (fSendMax < segment.acknowledge)
1729 			return DROP | IMMEDIATE_ACKNOWLEDGE;
1730 
1731 		if (segment.acknowledge == fSendUnacknowledged) {
1732 			if (buffer->size == 0 && advertisedWindow == fSendWindow
1733 				&& (segment.flags & TCP_FLAG_FINISH) == 0 && fSendUnacknowledged != fSendMax) {
1734 				TRACE("Receive(): duplicate ack!");
1735 				_DuplicateAcknowledge(segment);
1736 			}
1737 		} else if (segment.acknowledge < fSendUnacknowledged) {
1738 			return DROP;
1739 		} else {
1740 			// this segment acknowledges in flight data
1741 
1742 			if (fDuplicateAcknowledgeCount >= 3) {
1743 				// deflate the window.
1744 				if (segment.acknowledge > fRecover) {
1745 					uint32 flightSize = (fSendMax - fSendUnacknowledged).Number();
1746 					fCongestionWindow = min_c(fSlowStartThreshold,
1747 						max_c(flightSize, fSendMaxSegmentSize) + fSendMaxSegmentSize);
1748 					fFlags &= ~FLAG_RECOVERY;
1749 				}
1750 			}
1751 
1752 			if (fSendMax == segment.acknowledge)
1753 				TRACE("Receive(): all inflight data ack'd!");
1754 
1755 			if (segment.acknowledge > fSendQueue.LastSequence()
1756 					&& fState > ESTABLISHED) {
1757 				TRACE("Receive(): FIN has been acknowledged!");
1758 
1759 				switch (fState) {
1760 					case FINISH_SENT:
1761 						fState = FINISH_ACKNOWLEDGED;
1762 						T(State(this));
1763 						break;
1764 					case CLOSING:
1765 						fState = TIME_WAIT;
1766 						T(State(this));
1767 						_EnterTimeWait();
1768 						return DROP;
1769 					case WAIT_FOR_FINISH_ACKNOWLEDGE:
1770 						_Close();
1771 						break;
1772 
1773 					default:
1774 						break;
1775 				}
1776 			}
1777 
1778 			if (fState != CLOSED)
1779 				_Acknowledged(segment);
1780 		}
1781 	}
1782 
1783 	if (segment.flags & TCP_FLAG_URGENT) {
1784 		if (fState == ESTABLISHED || fState == FINISH_SENT
1785 			|| fState == FINISH_ACKNOWLEDGED) {
1786 			// TODO: Handle urgent data:
1787 			//  - RCV.UP <- max(RCV.UP, SEG.UP)
1788 			//  - signal the user that urgent data is available (SIGURG)
1789 		}
1790 	}
1791 
1792 	bool notify = false;
1793 
1794 	// The buffer may be freed if its data is added to the queue, so cache
1795 	// the size as we still need it later.
1796 	uint32 bufferSize = buffer->size;
1797 
1798 	if ((bufferSize > 0 || (segment.flags & TCP_FLAG_FINISH) != 0)
1799 		&& _ShouldReceive())
1800 		notify = _AddData(segment, buffer);
1801 	else {
1802 		if ((fFlags & FLAG_NO_RECEIVE) != 0)
1803 			fReceiveNext += buffer->size;
1804 
1805 		action = (action & ~KEEP) | DROP;
1806 	}
1807 
1808 	if ((segment.flags & TCP_FLAG_FINISH) != 0) {
1809 		segmentLength++;
1810 		if (fState != CLOSED && fState != LISTEN && fState != SYNCHRONIZE_SENT) {
1811 			TRACE("Receive(): peer is finishing connection!");
1812 			fReceiveNext++;
1813 			notify = true;
1814 
1815 			// FIN implies PUSH
1816 			fReceiveQueue.SetPushPointer();
1817 
1818 			// we'll reply immediately to the FIN if we are not
1819 			// transitioning to TIME WAIT so we immediatly ACK it.
1820 			action |= IMMEDIATE_ACKNOWLEDGE;
1821 
1822 			// other side is closing connection; change states
1823 			switch (fState) {
1824 				case ESTABLISHED:
1825 				case SYNCHRONIZE_RECEIVED:
1826 					fState = FINISH_RECEIVED;
1827 					T(State(this));
1828 					break;
1829 				case FINISH_SENT:
1830 					// simultaneous close
1831 					fState = CLOSING;
1832 					T(State(this));
1833 					break;
1834 				case FINISH_ACKNOWLEDGED:
1835 					fState = TIME_WAIT;
1836 					T(State(this));
1837 					_EnterTimeWait();
1838 					break;
1839 				case TIME_WAIT:
1840 					_UpdateTimeWait();
1841 					break;
1842 
1843 				default:
1844 					break;
1845 			}
1846 		}
1847 	}
1848 
1849 	if (notify)
1850 		_NotifyReader();
1851 
1852 	if (bufferSize > 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) != 0)
1853 		action |= ACKNOWLEDGE;
1854 
1855 	_UpdateTimestamps(segment, segmentLength);
1856 
1857 	TRACE("Receive() Action %" B_PRId32, action);
1858 
1859 	return action;
1860 }
1861 
1862 
1863 int32
1864 TCPEndpoint::SegmentReceived(tcp_segment_header& segment, net_buffer* buffer)
1865 {
1866 	MutexLocker locker(fLock);
1867 
1868 	TRACE("SegmentReceived(): buffer %p (%" B_PRIu32 " bytes) address %s "
1869 		"to %s flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32
1870 		", wnd %" B_PRIu32, buffer, buffer->size, PrintAddress(buffer->source),
1871 		PrintAddress(buffer->destination), segment.flags, segment.sequence,
1872 		segment.acknowledge,
1873 		(uint32)segment.advertised_window << fSendWindowShift);
1874 	T(Receive(this, segment,
1875 		(uint32)segment.advertised_window << fSendWindowShift, buffer));
1876 	int32 segmentAction = DROP;
1877 
1878 	switch (fState) {
1879 		case LISTEN:
1880 			segmentAction = _ListenReceive(segment, buffer);
1881 			break;
1882 
1883 		case SYNCHRONIZE_SENT:
1884 			segmentAction = _SynchronizeSentReceive(segment, buffer);
1885 			break;
1886 
1887 		case SYNCHRONIZE_RECEIVED:
1888 		case ESTABLISHED:
1889 		case FINISH_RECEIVED:
1890 		case WAIT_FOR_FINISH_ACKNOWLEDGE:
1891 		case FINISH_SENT:
1892 		case FINISH_ACKNOWLEDGED:
1893 		case CLOSING:
1894 		case TIME_WAIT:
1895 		case CLOSED:
1896 			segmentAction = _Receive(segment, buffer);
1897 			break;
1898 	}
1899 
1900 	// process acknowledge action as asked for by the *Receive() method
1901 	if (segmentAction & IMMEDIATE_ACKNOWLEDGE)
1902 		SendAcknowledge(true);
1903 	else if (segmentAction & ACKNOWLEDGE)
1904 		DelayedAcknowledge();
1905 
1906 	if ((fFlags & (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE))
1907 			== (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) {
1908 		locker.Unlock();
1909 		gSocketModule->release_socket(socket);
1910 	}
1911 
1912 	return segmentAction;
1913 }
1914 
1915 
1916 //	#pragma mark - send
1917 
1918 
1919 inline uint8
1920 TCPEndpoint::_CurrentFlags()
1921 {
1922 	// we don't set FLAG_FINISH here, instead we do it
1923 	// conditionally below depending if we are sending
1924 	// the last bytes of the send queue.
1925 
1926 	switch (fState) {
1927 		case CLOSED:
1928 			return TCP_FLAG_RESET | TCP_FLAG_ACKNOWLEDGE;
1929 
1930 		case SYNCHRONIZE_SENT:
1931 			return TCP_FLAG_SYNCHRONIZE;
1932 		case SYNCHRONIZE_RECEIVED:
1933 			return TCP_FLAG_SYNCHRONIZE | TCP_FLAG_ACKNOWLEDGE;
1934 
1935 		case ESTABLISHED:
1936 		case FINISH_RECEIVED:
1937 		case FINISH_ACKNOWLEDGED:
1938 		case TIME_WAIT:
1939 		case WAIT_FOR_FINISH_ACKNOWLEDGE:
1940 		case FINISH_SENT:
1941 		case CLOSING:
1942 			return TCP_FLAG_ACKNOWLEDGE;
1943 
1944 		default:
1945 			return 0;
1946 	}
1947 }
1948 
1949 
1950 inline bool
1951 TCPEndpoint::_ShouldSendSegment(tcp_segment_header& segment, uint32 length,
1952 	uint32 segmentMaxSize, uint32 flightSize)
1953 {
1954 	if (fState == ESTABLISHED && fSendMaxSegments == 0)
1955 		return false;
1956 
1957 	if (length > 0) {
1958 		// Avoid the silly window syndrome - we only send a segment in case:
1959 		// - we have a full segment to send, or
1960 		// - we're at the end of our buffer queue, or
1961 		// - the buffer is at least larger than half of the maximum send window,
1962 		//   or
1963 		// - we're retransmitting data
1964 		if (length == segmentMaxSize
1965 			|| (fOptions & TCP_NODELAY) != 0
1966 			|| tcp_sequence(fSendNext + length) == fSendQueue.LastSequence()
1967 			|| (fSendMaxWindow > 0 && length >= fSendMaxWindow / 2))
1968 			return true;
1969 	}
1970 
1971 	// check if we need to send a window update to the peer
1972 	if (segment.advertised_window > 0) {
1973 		// correct the window to take into account what already has been advertised
1974 		uint32 window = (segment.advertised_window << fReceiveWindowShift)
1975 			- (fReceiveMaxAdvertised - fReceiveNext).Number();
1976 
1977 		// if we can advertise a window larger than twice the maximum segment
1978 		// size, or half the maximum buffer size we send a window update
1979 		if (window >= (fReceiveMaxSegmentSize << 1)
1980 			|| window >= (socket->receive.buffer_size >> 1))
1981 			return true;
1982 	}
1983 
1984 	if ((segment.flags & (TCP_FLAG_SYNCHRONIZE | TCP_FLAG_FINISH
1985 			| TCP_FLAG_RESET)) != 0)
1986 		return true;
1987 
1988 	// We do have urgent data pending
1989 	if (fSendUrgentOffset > fSendNext)
1990 		return true;
1991 
1992 	// there is no reason to send a segment just now
1993 	return false;
1994 }
1995 
1996 
1997 status_t
1998 TCPEndpoint::_SendQueued(bool force)
1999 {
2000 	return _SendQueued(force, fSendWindow);
2001 }
2002 
2003 
2004 /*!	Sends one or more TCP segments with the data waiting in the queue, or some
2005 	specific flags that need to be sent.
2006 */
2007 status_t
2008 TCPEndpoint::_SendQueued(bool force, uint32 sendWindow)
2009 {
2010 	if (fRoute == NULL)
2011 		return B_ERROR;
2012 
2013 	// in passive state?
2014 	if (fState == LISTEN)
2015 		return B_ERROR;
2016 
2017 	tcp_segment_header segment(_CurrentFlags());
2018 
2019 	if ((fOptions & TCP_NOOPT) == 0) {
2020 		if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0) {
2021 			segment.options |= TCP_HAS_TIMESTAMPS;
2022 			segment.timestamp_reply = fReceivedTimestamp;
2023 			segment.timestamp_value = tcp_now();
2024 		}
2025 
2026 		if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0
2027 			&& fSendNext == fInitialSendSequence) {
2028 			// add connection establishment options
2029 			segment.max_segment_size = fReceiveMaxSegmentSize;
2030 			if (fFlags & FLAG_OPTION_WINDOW_SCALE) {
2031 				segment.options |= TCP_HAS_WINDOW_SCALE;
2032 				segment.window_shift = fReceiveWindowShift;
2033 			}
2034 		}
2035 	}
2036 
2037 	size_t availableBytes = fReceiveQueue.Free();
2038 	// window size must remain same for duplicate acknowledgements
2039 	if (!fReceiveQueue.IsContiguous())
2040 		availableBytes = (fReceiveMaxAdvertised - fReceiveNext).Number();
2041 
2042 	if (fFlags & FLAG_OPTION_WINDOW_SCALE)
2043 		segment.advertised_window = availableBytes >> fReceiveWindowShift;
2044 	else
2045 		segment.advertised_window = min_c(TCP_MAX_WINDOW, availableBytes);
2046 
2047 	segment.acknowledge = fReceiveNext.Number();
2048 
2049 	// Process urgent data
2050 	if (fSendUrgentOffset > fSendNext) {
2051 		segment.flags |= TCP_FLAG_URGENT;
2052 		segment.urgent_offset = (fSendUrgentOffset - fSendNext).Number();
2053 	} else {
2054 		fSendUrgentOffset = fSendUnacknowledged.Number();
2055 			// Keep urgent offset updated, so that it doesn't reach into our
2056 			// send window on overlap
2057 		segment.urgent_offset = 0;
2058 	}
2059 
2060 	if (fCongestionWindow > 0 && fCongestionWindow < sendWindow)
2061 		sendWindow = fCongestionWindow;
2062 
2063 	// fSendUnacknowledged
2064 	//  |    fSendNext      fSendMax
2065 	//  |        |              |
2066 	//  v        v              v
2067 	//  -----------------------------------
2068 	//  | effective window           |
2069 	//  -----------------------------------
2070 
2071 	// Flight size represents the window of data which is currently in the
2072 	// ether. We should never send data such as the flight size becomes larger
2073 	// than the effective window. Note however that the effective window may be
2074 	// reduced (by congestion for instance), so at some point in time flight
2075 	// size may be larger than the currently calculated window.
2076 
2077 	uint32 flightSize = (fSendMax - fSendUnacknowledged).Number();
2078 	uint32 consumedWindow = (fSendNext - fSendUnacknowledged).Number();
2079 
2080 	if (consumedWindow > sendWindow) {
2081 		sendWindow = 0;
2082 		// TODO: enter persist state? try to get a window update.
2083 	} else
2084 		sendWindow -= consumedWindow;
2085 
2086 	uint32 length = min_c(fSendQueue.Available(fSendNext), sendWindow);
2087 	bool shouldStartRetransmitTimer = fSendNext == fSendUnacknowledged;
2088 	bool retransmit = fSendNext < fSendMax;
2089 
2090 	if (fDuplicateAcknowledgeCount != 0) {
2091 		// send at most 1 SMSS of data when under limited transmit, fast transmit/recovery
2092 		length = min_c(length, fSendMaxSegmentSize);
2093 	}
2094 
2095 	do {
2096 		uint32 segmentMaxSize = fSendMaxSegmentSize
2097 			- tcp_options_length(segment);
2098 		uint32 segmentLength = min_c(length, segmentMaxSize);
2099 
2100 		if (fSendNext + segmentLength == fSendQueue.LastSequence() && !force) {
2101 			if (state_needs_finish(fState))
2102 				segment.flags |= TCP_FLAG_FINISH;
2103 			if (length > 0)
2104 				segment.flags |= TCP_FLAG_PUSH;
2105 		}
2106 
2107 		// Determine if we should really send this segment
2108 		if (!force && !retransmit && !_ShouldSendSegment(segment, segmentLength,
2109 				segmentMaxSize, flightSize)) {
2110 			if (fSendQueue.Available()
2111 				&& !gStackModule->is_timer_active(&fPersistTimer)
2112 				&& !gStackModule->is_timer_active(&fRetransmitTimer))
2113 				_StartPersistTimer();
2114 			break;
2115 		}
2116 
2117 		net_buffer *buffer = gBufferModule->create(256);
2118 		if (buffer == NULL)
2119 			return B_NO_MEMORY;
2120 
2121 		status_t status = B_OK;
2122 		if (segmentLength > 0)
2123 			status = fSendQueue.Get(buffer, fSendNext, segmentLength);
2124 		if (status < B_OK) {
2125 			gBufferModule->free(buffer);
2126 			return status;
2127 		}
2128 
2129 		LocalAddress().CopyTo(buffer->source);
2130 		PeerAddress().CopyTo(buffer->destination);
2131 
2132 		uint32 size = buffer->size;
2133 		segment.sequence = fSendNext.Number();
2134 
2135 		TRACE("SendQueued(): buffer %p (%" B_PRIu32 " bytes) address %s to "
2136 			"%s flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32
2137 			", rwnd %" B_PRIu16 ", cwnd %" B_PRIu32 ", ssthresh %" B_PRIu32
2138 			", len %" B_PRIu32 ", first %" B_PRIu32 ", last %" B_PRIu32,
2139 			buffer, buffer->size, PrintAddress(buffer->source),
2140 			PrintAddress(buffer->destination), segment.flags, segment.sequence,
2141 			segment.acknowledge, segment.advertised_window,
2142 			fCongestionWindow, fSlowStartThreshold, segmentLength,
2143 			fSendQueue.FirstSequence().Number(),
2144 			fSendQueue.LastSequence().Number());
2145 		T(Send(this, segment, buffer, fSendQueue.FirstSequence(),
2146 			fSendQueue.LastSequence()));
2147 
2148 		PROBE(buffer, sendWindow);
2149 		sendWindow -= buffer->size;
2150 
2151 		status = add_tcp_header(AddressModule(), segment, buffer);
2152 		if (status != B_OK) {
2153 			gBufferModule->free(buffer);
2154 			return status;
2155 		}
2156 
2157 		// Update send status - we need to do this before we send the data
2158 		// for local connections as the answer is directly handled
2159 
2160 		if (segment.flags & TCP_FLAG_SYNCHRONIZE) {
2161 			segment.options &= ~TCP_HAS_WINDOW_SCALE;
2162 			segment.max_segment_size = 0;
2163 			size++;
2164 		}
2165 
2166 		if (segment.flags & TCP_FLAG_FINISH)
2167 			size++;
2168 
2169 		uint32 sendMax = fSendMax.Number();
2170 		fSendNext += size;
2171 		if (fSendMax < fSendNext)
2172 			fSendMax = fSendNext;
2173 
2174 		fReceiveMaxAdvertised = fReceiveNext
2175 			+ ((uint32)segment.advertised_window << fReceiveWindowShift);
2176 
2177 		if (segmentLength != 0 && fState == ESTABLISHED)
2178 			--fSendMaxSegments;
2179 
2180 		status = next->module->send_routed_data(next, fRoute, buffer);
2181 		if (status < B_OK) {
2182 			gBufferModule->free(buffer);
2183 
2184 			fSendNext = segment.sequence;
2185 			fSendMax = sendMax;
2186 				// restore send status
2187 			return status;
2188 		}
2189 
2190 		if (fSendTime == 0 && !retransmit
2191 			&& (segmentLength != 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) !=0)) {
2192 			fSendTime = tcp_now();
2193 			fRoundTripStartSequence = segment.sequence;
2194 		}
2195 
2196 		if (shouldStartRetransmitTimer && size > 0) {
2197 			TRACE("starting initial retransmit timer of: %" B_PRIdBIGTIME,
2198 				fRetransmitTimeout);
2199 			gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout);
2200 			T(TimerSet(this, "retransmit", fRetransmitTimeout));
2201 			shouldStartRetransmitTimer = false;
2202 		}
2203 
2204 		if (segment.flags & TCP_FLAG_ACKNOWLEDGE)
2205 			fLastAcknowledgeSent = segment.acknowledge;
2206 
2207 		length -= segmentLength;
2208 		segment.flags &= ~(TCP_FLAG_SYNCHRONIZE | TCP_FLAG_RESET
2209 			| TCP_FLAG_FINISH);
2210 
2211 		if (retransmit)
2212 			break;
2213 
2214 	} while (length > 0);
2215 
2216 	return B_OK;
2217 }
2218 
2219 
2220 int
2221 TCPEndpoint::_MaxSegmentSize(const sockaddr* address) const
2222 {
2223 	return next->module->get_mtu(next, address) - sizeof(tcp_header);
2224 }
2225 
2226 
2227 status_t
2228 TCPEndpoint::_PrepareSendPath(const sockaddr* peer)
2229 {
2230 	if (fRoute == NULL) {
2231 		fRoute = gDatalinkModule->get_route(Domain(), peer);
2232 		if (fRoute == NULL)
2233 			return ENETUNREACH;
2234 
2235 		if ((fRoute->flags & RTF_LOCAL) != 0)
2236 			fFlags |= FLAG_LOCAL;
2237 	}
2238 
2239 	// make sure connection does not already exist
2240 	status_t status = fManager->SetConnection(this, *LocalAddress(), peer,
2241 		fRoute->interface_address->local);
2242 	if (status < B_OK)
2243 		return status;
2244 
2245 	fInitialSendSequence = system_time() >> 4;
2246 	fSendNext = fInitialSendSequence;
2247 	fSendUnacknowledged = fInitialSendSequence;
2248 	fSendMax = fInitialSendSequence;
2249 	fSendUrgentOffset = fInitialSendSequence;
2250 	fRecover = fInitialSendSequence.Number();
2251 
2252 	// we are counting the SYN here
2253 	fSendQueue.SetInitialSequence(fSendNext + 1);
2254 
2255 	fReceiveMaxSegmentSize = _MaxSegmentSize(peer);
2256 
2257 	// Compute the window shift we advertise to our peer - if it doesn't support
2258 	// this option, this will be reset to 0 (when its SYN is received)
2259 	fReceiveWindowShift = 0;
2260 	while (fReceiveWindowShift < TCP_MAX_WINDOW_SHIFT
2261 		&& (0xffffUL << fReceiveWindowShift) < socket->receive.buffer_size) {
2262 		fReceiveWindowShift++;
2263 	}
2264 
2265 	return B_OK;
2266 }
2267 
2268 
2269 void
2270 TCPEndpoint::_Acknowledged(tcp_segment_header& segment)
2271 {
2272 	TRACE("_Acknowledged(): ack %" B_PRIu32 "; uack %" B_PRIu32 "; next %"
2273 		B_PRIu32 "; max %" B_PRIu32, segment.acknowledge,
2274 		fSendUnacknowledged.Number(), fSendNext.Number(), fSendMax.Number());
2275 
2276 	ASSERT(fSendUnacknowledged <= segment.acknowledge);
2277 
2278 	if (fSendUnacknowledged < segment.acknowledge) {
2279 		fSendQueue.RemoveUntil(segment.acknowledge);
2280 
2281 		uint32 bytesAcknowledged = segment.acknowledge - fSendUnacknowledged.Number();
2282 		fPreviousHighestAcknowledge = fSendUnacknowledged;
2283 		fSendUnacknowledged = segment.acknowledge;
2284 		uint32 flightSize = (fSendMax - fSendUnacknowledged).Number();
2285 		int32 expectedSamples = flightSize / (fSendMaxSegmentSize << 1);
2286 
2287 		if (fPreviousHighestAcknowledge > fSendUnacknowledged) {
2288 			// need to update the recover variable upon a sequence wraparound
2289 			fRecover = segment.acknowledge - 1;
2290 		}
2291 
2292 		// the acknowledgment of the SYN/ACK MUST NOT increase the size of the congestion window
2293 		if (fSendUnacknowledged != fInitialSendSequence) {
2294 			if (fCongestionWindow < fSlowStartThreshold)
2295 				fCongestionWindow += min_c(bytesAcknowledged, fSendMaxSegmentSize);
2296 			else {
2297 				uint32 increment = fSendMaxSegmentSize * fSendMaxSegmentSize;
2298 
2299 				if (increment < fCongestionWindow)
2300 					increment = 1;
2301 				else
2302 					increment /= fCongestionWindow;
2303 
2304 				fCongestionWindow += increment;
2305 			}
2306 
2307 			fSendMaxSegments = UINT32_MAX;
2308 		}
2309 
2310 		if ((fFlags & FLAG_RECOVERY) != 0) {
2311 			fSendNext = fSendUnacknowledged;
2312 			_SendQueued();
2313 			fCongestionWindow -= bytesAcknowledged;
2314 
2315 			if (bytesAcknowledged > fSendMaxSegmentSize)
2316 				fCongestionWindow += fSendMaxSegmentSize;
2317 
2318 			fSendNext = fSendMax;
2319 		} else
2320 			fDuplicateAcknowledgeCount = 0;
2321 
2322 		if (fSendNext < fSendUnacknowledged)
2323 			fSendNext = fSendUnacknowledged;
2324 
2325 		if (fFlags & FLAG_OPTION_TIMESTAMP) {
2326 			_UpdateRoundTripTime(tcp_diff_timestamp(segment.timestamp_reply),
2327 				expectedSamples > 0 ? expectedSamples : 1);
2328 		} else if (fSendTime != 0 && fRoundTripStartSequence < segment.acknowledge) {
2329 			_UpdateRoundTripTime(tcp_diff_timestamp(fSendTime), 1);
2330 			fSendTime = 0;
2331 		}
2332 
2333 		if (fSendUnacknowledged == fSendMax) {
2334 			TRACE("all acknowledged, cancelling retransmission timer.");
2335 			gStackModule->cancel_timer(&fRetransmitTimer);
2336 			T(TimerSet(this, "retransmit", -1));
2337 		} else {
2338 			TRACE("data acknowledged, resetting retransmission timer to: %"
2339 				B_PRIdBIGTIME, fRetransmitTimeout);
2340 			gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout);
2341 			T(TimerSet(this, "retransmit", fRetransmitTimeout));
2342 		}
2343 
2344 		if (is_writable(fState)) {
2345 			// notify threads waiting on the socket to become writable again
2346 			fSendCondition.NotifyAll();
2347 			gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Free());
2348 		}
2349 	}
2350 
2351 	// if there is data left to be sent, send it now
2352 	if (fSendQueue.Used() > 0)
2353 		_SendQueued();
2354 }
2355 
2356 
2357 void
2358 TCPEndpoint::_Retransmit()
2359 {
2360 	TRACE("Retransmit()");
2361 
2362 	if (fState < ESTABLISHED) {
2363 		fRetransmitTimeout = TCP_SYN_RETRANSMIT_TIMEOUT;
2364 		fCongestionWindow = fSendMaxSegmentSize;
2365 	} else {
2366 		_ResetSlowStart();
2367 		fDuplicateAcknowledgeCount = 0;
2368 		// Do exponential back off of the retransmit timeout
2369 		fRetransmitTimeout *= 2;
2370 		if (fRetransmitTimeout > TCP_MAX_RETRANSMIT_TIMEOUT)
2371 			fRetransmitTimeout = TCP_MAX_RETRANSMIT_TIMEOUT;
2372 	}
2373 
2374 	fSendNext = fSendUnacknowledged;
2375 	_SendQueued();
2376 
2377 	fRecover = fSendNext.Number() - 1;
2378 	if ((fFlags & FLAG_RECOVERY) != 0)
2379 		fFlags &= ~FLAG_RECOVERY;
2380 }
2381 
2382 
2383 void
2384 TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime, int32 expectedSamples)
2385 {
2386 	if (fSmoothedRoundTripTime == 0) {
2387 		fSmoothedRoundTripTime = roundTripTime;
2388 		fRoundTripVariation = roundTripTime / 2;
2389 		fRetransmitTimeout = (fSmoothedRoundTripTime + max_c(100, fRoundTripVariation * 4))
2390 				* kTimestampFactor;
2391 	} else {
2392 		int32 delta = fSmoothedRoundTripTime - roundTripTime;
2393 		if (delta < 0)
2394 			delta = -delta;
2395 		fRoundTripVariation += (delta - fRoundTripVariation) / (expectedSamples * 4);
2396 		fSmoothedRoundTripTime += (roundTripTime - fSmoothedRoundTripTime) / (expectedSamples * 8);
2397 		fRetransmitTimeout = (fSmoothedRoundTripTime + max_c(100, fRoundTripVariation * 4))
2398 			* kTimestampFactor;
2399 	}
2400 
2401 	if (fRetransmitTimeout > TCP_MAX_RETRANSMIT_TIMEOUT)
2402 		fRetransmitTimeout = TCP_MAX_RETRANSMIT_TIMEOUT;
2403 
2404 	if (fRetransmitTimeout < TCP_MIN_RETRANSMIT_TIMEOUT)
2405 		fRetransmitTimeout = TCP_MIN_RETRANSMIT_TIMEOUT;
2406 
2407 	TRACE("  RTO is now %" B_PRIdBIGTIME " (after rtt %" B_PRId32 "ms)",
2408 		fRetransmitTimeout, roundTripTime);
2409 }
2410 
2411 
2412 void
2413 TCPEndpoint::_ResetSlowStart()
2414 {
2415 	fSlowStartThreshold = max_c((fSendMax - fSendUnacknowledged).Number() / 2,
2416 		2 * fSendMaxSegmentSize);
2417 	fCongestionWindow = fSendMaxSegmentSize;
2418 }
2419 
2420 
2421 //	#pragma mark - timer
2422 
2423 
2424 /*static*/ void
2425 TCPEndpoint::_RetransmitTimer(net_timer* timer, void* _endpoint)
2426 {
2427 	TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint;
2428 	T(TimerTriggered(endpoint, "retransmit"));
2429 
2430 	MutexLocker locker(endpoint->fLock);
2431 	if (!locker.IsLocked() || gStackModule->is_timer_active(timer))
2432 		return;
2433 
2434 	endpoint->_Retransmit();
2435 }
2436 
2437 
2438 /*static*/ void
2439 TCPEndpoint::_PersistTimer(net_timer* timer, void* _endpoint)
2440 {
2441 	TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint;
2442 	T(TimerTriggered(endpoint, "persist"));
2443 
2444 	MutexLocker locker(endpoint->fLock);
2445 	if (!locker.IsLocked())
2446 		return;
2447 
2448 	// the timer might not have been canceled early enough
2449 	if (endpoint->State() == CLOSED)
2450 		return;
2451 
2452 	endpoint->_SendQueued(true);
2453 }
2454 
2455 
2456 /*static*/ void
2457 TCPEndpoint::_DelayedAcknowledgeTimer(net_timer* timer, void* _endpoint)
2458 {
2459 	TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint;
2460 	T(TimerTriggered(endpoint, "delayed ack"));
2461 
2462 	MutexLocker locker(endpoint->fLock);
2463 	if (!locker.IsLocked())
2464 		return;
2465 
2466 	// the timer might not have been canceled early enough
2467 	if (endpoint->State() == CLOSED)
2468 		return;
2469 
2470 	endpoint->SendAcknowledge(true);
2471 }
2472 
2473 
2474 /*static*/ void
2475 TCPEndpoint::_TimeWaitTimer(net_timer* timer, void* _endpoint)
2476 {
2477 	TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint;
2478 	T(TimerTriggered(endpoint, "time-wait"));
2479 
2480 	MutexLocker locker(endpoint->fLock);
2481 	if (!locker.IsLocked())
2482 		return;
2483 
2484 	if ((endpoint->fFlags & FLAG_CLOSED) == 0) {
2485 		endpoint->fFlags |= FLAG_DELETE_ON_CLOSE;
2486 		return;
2487 	}
2488 
2489 	locker.Unlock();
2490 
2491 	gSocketModule->release_socket(endpoint->socket);
2492 }
2493 
2494 
2495 /*static*/ status_t
2496 TCPEndpoint::_WaitForCondition(ConditionVariable& condition,
2497 	MutexLocker& locker, bigtime_t timeout)
2498 {
2499 	ConditionVariableEntry entry;
2500 	condition.Add(&entry);
2501 
2502 	locker.Unlock();
2503 	status_t result = entry.Wait(B_ABSOLUTE_TIMEOUT | B_CAN_INTERRUPT, timeout);
2504 	locker.Lock();
2505 
2506 	return result;
2507 }
2508 
2509 
2510 //	#pragma mark -
2511 
2512 
2513 void
2514 TCPEndpoint::Dump() const
2515 {
2516 	kprintf("TCP endpoint %p\n", this);
2517 	kprintf("  state: %s\n", name_for_state(fState));
2518 	kprintf("  flags: 0x%" B_PRIx32 "\n", fFlags);
2519 #if KDEBUG
2520 	kprintf("  lock: { %p, holder: %" B_PRId32 " }\n", &fLock, fLock.holder);
2521 #endif
2522 	kprintf("  accept sem: %" B_PRId32 "\n", fAcceptSemaphore);
2523 	kprintf("  options: 0x%" B_PRIx32 "\n", (uint32)fOptions);
2524 	kprintf("  send\n");
2525 	kprintf("    window shift: %" B_PRIu8 "\n", fSendWindowShift);
2526 	kprintf("    unacknowledged: %" B_PRIu32 "\n",
2527 		fSendUnacknowledged.Number());
2528 	kprintf("    next: %" B_PRIu32 "\n", fSendNext.Number());
2529 	kprintf("    max: %" B_PRIu32 "\n", fSendMax.Number());
2530 	kprintf("    urgent offset: %" B_PRIu32 "\n", fSendUrgentOffset.Number());
2531 	kprintf("    window: %" B_PRIu32 "\n", fSendWindow);
2532 	kprintf("    max window: %" B_PRIu32 "\n", fSendMaxWindow);
2533 	kprintf("    max segment size: %" B_PRIu32 "\n", fSendMaxSegmentSize);
2534 	kprintf("    queue: %" B_PRIuSIZE " / %" B_PRIuSIZE "\n", fSendQueue.Used(),
2535 		fSendQueue.Size());
2536 #if DEBUG_BUFFER_QUEUE
2537 	fSendQueue.Dump();
2538 #endif
2539 	kprintf("    last acknowledge sent: %" B_PRIu32 "\n",
2540 		fLastAcknowledgeSent.Number());
2541 	kprintf("    initial sequence: %" B_PRIu32 "\n",
2542 		fInitialSendSequence.Number());
2543 	kprintf("  receive\n");
2544 	kprintf("    window shift: %" B_PRIu8 "\n", fReceiveWindowShift);
2545 	kprintf("    next: %" B_PRIu32 "\n", fReceiveNext.Number());
2546 	kprintf("    max advertised: %" B_PRIu32 "\n",
2547 		fReceiveMaxAdvertised.Number());
2548 	kprintf("    window: %" B_PRIu32 "\n", fReceiveWindow);
2549 	kprintf("    max segment size: %" B_PRIu32 "\n", fReceiveMaxSegmentSize);
2550 	kprintf("    queue: %" B_PRIuSIZE " / %" B_PRIuSIZE "\n",
2551 		fReceiveQueue.Available(), fReceiveQueue.Size());
2552 #if DEBUG_BUFFER_QUEUE
2553 	fReceiveQueue.Dump();
2554 #endif
2555 	kprintf("    initial sequence: %" B_PRIu32 "\n",
2556 		fInitialReceiveSequence.Number());
2557 	kprintf("    duplicate acknowledge count: %" B_PRIu32 "\n",
2558 		fDuplicateAcknowledgeCount);
2559 	kprintf("  smoothed round trip time: %" B_PRId32 " (deviation %" B_PRId32 ")\n",
2560 		fSmoothedRoundTripTime, fRoundTripVariation);
2561 	kprintf("  retransmit timeout: %" B_PRId64 "\n", fRetransmitTimeout);
2562 	kprintf("  congestion window: %" B_PRIu32 "\n", fCongestionWindow);
2563 	kprintf("  slow start threshold: %" B_PRIu32 "\n", fSlowStartThreshold);
2564 }
2565 
2566