xref: /haiku/src/add-ons/kernel/network/protocols/tcp/TCPEndpoint.cpp (revision 4a32f48e70297d9a634646f01e08c2f451ecd6bd)
1 /*
2  * Copyright 2006-2010, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Andrew Galante, haiku.galante@gmail.com
7  *		Axel Dörfler, axeld@pinc-software.de
8  *		Hugo Santos, hugosantos@gmail.com
9  */
10 
11 
12 #include "TCPEndpoint.h"
13 
14 #include <netinet/in.h>
15 #include <netinet/ip.h>
16 #include <netinet/tcp.h>
17 #include <new>
18 #include <signal.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <stdint.h>
22 
23 #include <KernelExport.h>
24 #include <Select.h>
25 
26 #include <net_buffer.h>
27 #include <net_datalink.h>
28 #include <net_stat.h>
29 #include <NetBufferUtilities.h>
30 #include <NetUtilities.h>
31 
32 #include <lock.h>
33 #include <tracing.h>
34 #include <util/AutoLock.h>
35 #include <util/list.h>
36 
37 #include "EndpointManager.h"
38 
39 
40 // References:
41 //	- RFC 793 - Transmission Control Protocol
42 //	- RFC 813 - Window and Acknowledgement Strategy in TCP
43 //	- RFC 1337 - TIME_WAIT Assassination Hazards in TCP
44 //
45 // Things this implementation currently doesn't implement:
46 //	- TCP Slow Start, Congestion Avoidance, Fast Retransmit, and Fast Recovery,
47 //	  RFC 2001, RFC 2581, RFC 3042
48 //	- NewReno Modification to TCP's Fast Recovery, RFC 2582
49 //	- Explicit Congestion Notification (ECN), RFC 3168
50 //	- SYN-Cache
51 //	- SACK, Selective Acknowledgment - RFC 2018, RFC 2883, RFC 3517
52 //	- Forward RTO-Recovery, RFC 4138
53 //	- Time-Wait hash instead of keeping sockets alive
54 //
55 // Things incomplete in this implementation:
56 //	- TCP Extensions for High Performance, RFC 1323 - RTTM, PAWS
57 
58 #define PrintAddress(address) \
59 	AddressString(Domain(), address, true).Data()
60 
61 //#define TRACE_TCP
62 //#define PROBE_TCP
63 
64 #ifdef TRACE_TCP
65 // the space before ', ##args' is important in order for this to work with cpp 2.95
66 #	define TRACE(format, args...)	dprintf("%" B_PRId32 ": TCP [%" \
67 		B_PRIdBIGTIME "] %p (%12s) " format "\n", find_thread(NULL), \
68 		system_time(), this, name_for_state(fState) , ##args)
69 #else
70 #	define TRACE(args...)			do { } while (0)
71 #endif
72 
73 #ifdef PROBE_TCP
74 #	define PROBE(buffer, window) \
75 	dprintf("TCP PROBE %" B_PRIdBIGTIME " %s %s %" B_PRIu32 " snxt %" B_PRIu32 \
76 		" suna %" B_PRIu32 " cw %" B_PRIu32 " sst %" B_PRIu32 " win %" \
77 		B_PRIu32 " swin %" B_PRIu32 " smax-suna %" B_PRIu32 " savail %" \
78 		B_PRIuSIZE " sqused %" B_PRIuSIZE " rto %" B_PRIdBIGTIME "\n", \
79 		system_time(), PrintAddress(buffer->source), \
80 		PrintAddress(buffer->destination), buffer->size, fSendNext.Number(), \
81 		fSendUnacknowledged.Number(), fCongestionWindow, fSlowStartThreshold, \
82 		window, fSendWindow, (fSendMax - fSendUnacknowledged).Number(), \
83 		fSendQueue.Available(fSendNext), fSendQueue.Used(), fRetransmitTimeout)
84 #else
85 #	define PROBE(buffer, window)	do { } while (0)
86 #endif
87 
88 #if TCP_TRACING
89 namespace TCPTracing {
90 
91 class Receive : public AbstractTraceEntry {
92 public:
93 	Receive(TCPEndpoint* endpoint, tcp_segment_header& segment, uint32 window,
94 			net_buffer* buffer)
95 		:
96 		fEndpoint(endpoint),
97 		fBuffer(buffer),
98 		fBufferSize(buffer->size),
99 		fSequence(segment.sequence),
100 		fAcknowledge(segment.acknowledge),
101 		fWindow(window),
102 		fState(endpoint->State()),
103 		fFlags(segment.flags)
104 	{
105 		Initialized();
106 	}
107 
108 	virtual void AddDump(TraceOutput& out)
109 	{
110 		out.Print("tcp:%p (%12s) receive buffer %p (%" B_PRIu32 " bytes), "
111 			"flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32
112 			", wnd %" B_PRIu32, fEndpoint, name_for_state(fState), fBuffer,
113 			fBufferSize, fFlags, fSequence, fAcknowledge, fWindow);
114 	}
115 
116 protected:
117 	TCPEndpoint*	fEndpoint;
118 	net_buffer*		fBuffer;
119 	uint32			fBufferSize;
120 	uint32			fSequence;
121 	uint32			fAcknowledge;
122 	uint32			fWindow;
123 	tcp_state		fState;
124 	uint8			fFlags;
125 };
126 
127 class Send : public AbstractTraceEntry {
128 public:
129 	Send(TCPEndpoint* endpoint, tcp_segment_header& segment, net_buffer* buffer,
130 			tcp_sequence firstSequence, tcp_sequence lastSequence)
131 		:
132 		fEndpoint(endpoint),
133 		fBuffer(buffer),
134 		fBufferSize(buffer->size),
135 		fSequence(segment.sequence),
136 		fAcknowledge(segment.acknowledge),
137 		fFirstSequence(firstSequence.Number()),
138 		fLastSequence(lastSequence.Number()),
139 		fState(endpoint->State()),
140 		fFlags(segment.flags)
141 	{
142 		Initialized();
143 	}
144 
145 	virtual void AddDump(TraceOutput& out)
146 	{
147 		out.Print("tcp:%p (%12s) send buffer %p (%" B_PRIu32 " bytes), "
148 			"flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32
149 			", first %" B_PRIu32 ", last %" B_PRIu32, fEndpoint,
150 			name_for_state(fState), fBuffer, fBufferSize, fFlags, fSequence,
151 			fAcknowledge, fFirstSequence, fLastSequence);
152 	}
153 
154 protected:
155 	TCPEndpoint*	fEndpoint;
156 	net_buffer*		fBuffer;
157 	uint32			fBufferSize;
158 	uint32			fSequence;
159 	uint32			fAcknowledge;
160 	uint32			fFirstSequence;
161 	uint32			fLastSequence;
162 	tcp_state		fState;
163 	uint8			fFlags;
164 };
165 
166 class State : public AbstractTraceEntry {
167 public:
168 	State(TCPEndpoint* endpoint)
169 		:
170 		fEndpoint(endpoint),
171 		fState(endpoint->State())
172 	{
173 		Initialized();
174 	}
175 
176 	virtual void AddDump(TraceOutput& out)
177 	{
178 		out.Print("tcp:%p (%12s) state change", fEndpoint,
179 			name_for_state(fState));
180 	}
181 
182 protected:
183 	TCPEndpoint*	fEndpoint;
184 	tcp_state		fState;
185 };
186 
187 class Spawn : public AbstractTraceEntry {
188 public:
189 	Spawn(TCPEndpoint* listeningEndpoint, TCPEndpoint* spawnedEndpoint)
190 		:
191 		fListeningEndpoint(listeningEndpoint),
192 		fSpawnedEndpoint(spawnedEndpoint)
193 	{
194 		Initialized();
195 	}
196 
197 	virtual void AddDump(TraceOutput& out)
198 	{
199 		out.Print("tcp:%p spawns %p", fListeningEndpoint, fSpawnedEndpoint);
200 	}
201 
202 protected:
203 	TCPEndpoint*	fListeningEndpoint;
204 	TCPEndpoint*	fSpawnedEndpoint;
205 };
206 
207 class Error : public AbstractTraceEntry {
208 public:
209 	Error(TCPEndpoint* endpoint, const char* error, int32 line)
210 		:
211 		fEndpoint(endpoint),
212 		fLine(line),
213 		fError(error),
214 		fState(endpoint->State())
215 	{
216 		Initialized();
217 	}
218 
219 	virtual void AddDump(TraceOutput& out)
220 	{
221 		out.Print("tcp:%p (%12s) error at line %" B_PRId32 ": %s", fEndpoint,
222 			name_for_state(fState), fLine, fError);
223 	}
224 
225 protected:
226 	TCPEndpoint*	fEndpoint;
227 	int32			fLine;
228 	const char*		fError;
229 	tcp_state		fState;
230 };
231 
232 class TimerSet : public AbstractTraceEntry {
233 public:
234 	TimerSet(TCPEndpoint* endpoint, const char* which, bigtime_t timeout)
235 		:
236 		fEndpoint(endpoint),
237 		fWhich(which),
238 		fTimeout(timeout),
239 		fState(endpoint->State())
240 	{
241 		Initialized();
242 	}
243 
244 	virtual void AddDump(TraceOutput& out)
245 	{
246 		out.Print("tcp:%p (%12s) %s timer set to %" B_PRIdBIGTIME, fEndpoint,
247 			name_for_state(fState), fWhich, fTimeout);
248 	}
249 
250 protected:
251 	TCPEndpoint*	fEndpoint;
252 	const char*		fWhich;
253 	bigtime_t		fTimeout;
254 	tcp_state		fState;
255 };
256 
257 class TimerTriggered : public AbstractTraceEntry {
258 public:
259 	TimerTriggered(TCPEndpoint* endpoint, const char* which)
260 		:
261 		fEndpoint(endpoint),
262 		fWhich(which),
263 		fState(endpoint->State())
264 	{
265 		Initialized();
266 	}
267 
268 	virtual void AddDump(TraceOutput& out)
269 	{
270 		out.Print("tcp:%p (%12s) %s timer triggered", fEndpoint,
271 			name_for_state(fState), fWhich);
272 	}
273 
274 protected:
275 	TCPEndpoint*	fEndpoint;
276 	const char*		fWhich;
277 	tcp_state		fState;
278 };
279 
280 class APICall : public AbstractTraceEntry {
281 public:
282 	APICall(TCPEndpoint* endpoint, const char* which)
283 		:
284 		fEndpoint(endpoint),
285 		fWhich(which),
286 		fState(endpoint->State())
287 	{
288 		Initialized();
289 	}
290 
291 	virtual void AddDump(TraceOutput& out)
292 	{
293 		out.Print("tcp:%p (%12s) api call: %s", fEndpoint,
294 			name_for_state(fState), fWhich);
295 	}
296 
297 protected:
298 	TCPEndpoint*	fEndpoint;
299 	const char*		fWhich;
300 	tcp_state		fState;
301 };
302 
303 }	// namespace TCPTracing
304 
305 #	define T(x)	new(std::nothrow) TCPTracing::x
306 #else
307 #	define T(x)
308 #endif	// TCP_TRACING
309 
310 
311 // constants for the fFlags field
312 enum {
313 	FLAG_OPTION_WINDOW_SCALE	= 0x01,
314 	FLAG_OPTION_TIMESTAMP		= 0x02,
315 	// TODO: Should FLAG_NO_RECEIVE apply as well to received connections?
316 	//       That is, what is expected from accept() after a shutdown()
317 	//       is performed on a listen()ing socket.
318 	FLAG_NO_RECEIVE				= 0x04,
319 	FLAG_CLOSED					= 0x08,
320 	FLAG_DELETE_ON_CLOSE		= 0x10,
321 	FLAG_LOCAL					= 0x20,
322 	FLAG_RECOVERY				= 0x40,
323 	FLAG_OPTION_SACK_PERMITTED	= 0x80,
324 };
325 
326 
327 static const int kTimestampFactor = 1000;
328 	// conversion factor between usec system time and msec tcp time
329 
330 
331 static inline bigtime_t
332 absolute_timeout(bigtime_t timeout)
333 {
334 	if (timeout == 0 || timeout == B_INFINITE_TIMEOUT)
335 		return timeout;
336 
337 	return timeout + system_time();
338 }
339 
340 
341 static inline status_t
342 posix_error(status_t error)
343 {
344 	if (error == B_TIMED_OUT)
345 		return B_WOULD_BLOCK;
346 
347 	return error;
348 }
349 
350 
351 static inline bool
352 in_window(const tcp_sequence& sequence, const tcp_sequence& receiveNext,
353 	uint32 receiveWindow)
354 {
355 	return sequence >= receiveNext && sequence < (receiveNext + receiveWindow);
356 }
357 
358 
359 static inline bool
360 segment_in_sequence(const tcp_segment_header& segment, int size,
361 	const tcp_sequence& receiveNext, uint32 receiveWindow)
362 {
363 	tcp_sequence sequence(segment.sequence);
364 	if (size == 0) {
365 		if (receiveWindow == 0)
366 			return sequence == receiveNext;
367 		return in_window(sequence, receiveNext, receiveWindow);
368 	} else {
369 		if (receiveWindow == 0)
370 			return false;
371 		return in_window(sequence, receiveNext, receiveWindow)
372 			|| in_window(sequence + size - 1, receiveNext, receiveWindow);
373 	}
374 }
375 
376 
377 static inline bool
378 is_writable(tcp_state state)
379 {
380 	return state == ESTABLISHED || state == FINISH_RECEIVED;
381 }
382 
383 
384 static inline bool
385 is_establishing(tcp_state state)
386 {
387 	return state == SYNCHRONIZE_SENT || state == SYNCHRONIZE_RECEIVED;
388 }
389 
390 
391 static inline uint32 tcp_now()
392 {
393 	return system_time() / kTimestampFactor;
394 }
395 
396 
397 static inline uint32 tcp_diff_timestamp(uint32 base)
398 {
399 	uint32 now = tcp_now();
400 
401 	if (now > base)
402 		return now - base;
403 
404 	return now + UINT_MAX - base;
405 }
406 
407 
408 static inline bool
409 state_needs_finish(int32 state)
410 {
411 	return state == WAIT_FOR_FINISH_ACKNOWLEDGE
412 		|| state == FINISH_SENT || state == CLOSING;
413 }
414 
415 
416 //	#pragma mark -
417 
418 
419 TCPEndpoint::TCPEndpoint(net_socket* socket)
420 	:
421 	ProtocolSocket(socket),
422 	fManager(NULL),
423 	fOptions(0),
424 	fSendWindowShift(0),
425 	fReceiveWindowShift(0),
426 	fSendUnacknowledged(0),
427 	fSendNext(0),
428 	fSendMax(0),
429 	fSendUrgentOffset(0),
430 	fSendWindow(0),
431 	fSendMaxWindow(0),
432 	fSendMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE),
433 	fSendMaxSegments(0),
434 	fSendQueue(socket->send.buffer_size),
435 	fInitialSendSequence(0),
436 	fPreviousHighestAcknowledge(0),
437 	fDuplicateAcknowledgeCount(0),
438 	fPreviousFlightSize(0),
439 	fRecover(0),
440 	fRoute(NULL),
441 	fReceiveNext(0),
442 	fReceiveMaxAdvertised(0),
443 	fReceiveWindow(socket->receive.buffer_size),
444 	fReceiveMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE),
445 	fReceiveQueue(socket->receive.buffer_size),
446 	fSmoothedRoundTripTime(0),
447 	fRoundTripVariation(0),
448 	fSendTime(0),
449 	fRoundTripStartSequence(0),
450 	fRetransmitTimeout(TCP_INITIAL_RTT),
451 	fReceivedTimestamp(0),
452 	fCongestionWindow(0),
453 	fSlowStartThreshold(0),
454 	fState(CLOSED),
455 	fFlags(FLAG_OPTION_WINDOW_SCALE | FLAG_OPTION_TIMESTAMP | FLAG_OPTION_SACK_PERMITTED)
456 {
457 	// TODO: to be replaced with a real read/write locking strategy!
458 	mutex_init(&fLock, "tcp lock");
459 
460 	fReceiveCondition.Init(this, "tcp receive");
461 	fSendCondition.Init(this, "tcp send");
462 
463 	gStackModule->init_timer(&fPersistTimer, TCPEndpoint::_PersistTimer, this);
464 	gStackModule->init_timer(&fRetransmitTimer, TCPEndpoint::_RetransmitTimer,
465 		this);
466 	gStackModule->init_timer(&fDelayedAcknowledgeTimer,
467 		TCPEndpoint::_DelayedAcknowledgeTimer, this);
468 	gStackModule->init_timer(&fTimeWaitTimer, TCPEndpoint::_TimeWaitTimer,
469 		this);
470 
471 	T(APICall(this, "constructor"));
472 }
473 
474 
475 TCPEndpoint::~TCPEndpoint()
476 {
477 	mutex_lock(&fLock);
478 
479 	T(APICall(this, "destructor"));
480 
481 	_CancelConnectionTimers();
482 	gStackModule->cancel_timer(&fTimeWaitTimer);
483 	T(TimerSet(this, "time-wait", -1));
484 
485 	if (fManager != NULL) {
486 		fManager->Unbind(this);
487 		put_endpoint_manager(fManager);
488 	}
489 
490 	mutex_destroy(&fLock);
491 
492 	// we need to wait for all timers to return
493 	gStackModule->wait_for_timer(&fRetransmitTimer);
494 	gStackModule->wait_for_timer(&fPersistTimer);
495 	gStackModule->wait_for_timer(&fDelayedAcknowledgeTimer);
496 	gStackModule->wait_for_timer(&fTimeWaitTimer);
497 
498 	gDatalinkModule->put_route(Domain(), fRoute);
499 }
500 
501 
502 status_t
503 TCPEndpoint::InitCheck() const
504 {
505 	return B_OK;
506 }
507 
508 
509 //	#pragma mark - protocol API
510 
511 
512 status_t
513 TCPEndpoint::Open()
514 {
515 	TRACE("Open()");
516 	T(APICall(this, "open"));
517 
518 	status_t status = ProtocolSocket::Open();
519 	if (status < B_OK)
520 		return status;
521 
522 	fManager = get_endpoint_manager(Domain());
523 	if (fManager == NULL)
524 		return EAFNOSUPPORT;
525 
526 	return B_OK;
527 }
528 
529 
530 status_t
531 TCPEndpoint::Close()
532 {
533 	MutexLocker locker(fLock);
534 
535 	TRACE("Close()");
536 	T(APICall(this, "close"));
537 
538 	if (fState == LISTEN)
539 		delete_sem(fAcceptSemaphore);
540 
541 	if (fState == SYNCHRONIZE_SENT || fState == LISTEN) {
542 		// TODO: what about linger in case of SYNCHRONIZE_SENT?
543 		fState = CLOSED;
544 		T(State(this));
545 		return B_OK;
546 	}
547 
548 	status_t status = _Disconnect(true);
549 	if (status != B_OK)
550 		return status;
551 
552 	if (socket->options & SO_LINGER) {
553 		TRACE("Close(): Lingering for %i secs", socket->linger);
554 
555 		bigtime_t maximum = absolute_timeout(socket->linger * 1000000LL);
556 
557 		while (fSendQueue.Used() > 0) {
558 			status = _WaitForCondition(fSendCondition, locker, maximum);
559 			if (status == B_TIMED_OUT || status == B_WOULD_BLOCK)
560 				break;
561 			else if (status < B_OK)
562 				return status;
563 		}
564 
565 		TRACE("Close(): after waiting, the SendQ was left with %" B_PRIuSIZE
566 			" bytes.", fSendQueue.Used());
567 	}
568 	return B_OK;
569 }
570 
571 
572 void
573 TCPEndpoint::Free()
574 {
575 	MutexLocker _(fLock);
576 
577 	TRACE("Free()");
578 	T(APICall(this, "free"));
579 
580 	if (fState <= SYNCHRONIZE_SENT)
581 		return;
582 
583 	// we are only interested in the timer, not in changing state
584 	_EnterTimeWait();
585 
586 	fFlags |= FLAG_CLOSED;
587 	if ((fFlags & FLAG_DELETE_ON_CLOSE) == 0) {
588 		// we'll be freed later when the 2MSL timer expires
589 		gSocketModule->acquire_socket(socket);
590 	}
591 }
592 
593 
594 /*!	Creates and sends a synchronize packet to /a address, and then waits
595 	until the connection has been established or refused.
596 */
597 status_t
598 TCPEndpoint::Connect(const sockaddr* address)
599 {
600 	if (!AddressModule()->is_same_family(address))
601 		return EAFNOSUPPORT;
602 
603 	MutexLocker locker(fLock);
604 
605 	TRACE("Connect() on address %s", PrintAddress(address));
606 	T(APICall(this, "connect"));
607 
608 	if (gStackModule->is_restarted_syscall()) {
609 		bigtime_t timeout = gStackModule->restore_syscall_restart_timeout();
610 		status_t status = _WaitForEstablished(locker, timeout);
611 		TRACE("  Connect(): Connection complete: %s (timeout was %"
612 			B_PRIdBIGTIME ")", strerror(status), timeout);
613 		return posix_error(status);
614 	}
615 
616 	// Can only call connect() from CLOSED or LISTEN states
617 	// otherwise endpoint is considered already connected
618 	if (fState == LISTEN) {
619 		// this socket is about to connect; remove pending connections in the backlog
620 		gSocketModule->set_max_backlog(socket, 0);
621 	} else if (fState == ESTABLISHED) {
622 		return EISCONN;
623 	} else if (fState != CLOSED)
624 		return EALREADY;
625 
626 	// consider destination address INADDR_ANY as INADDR_LOOPBACK
627 	sockaddr_storage _address;
628 	if (AddressModule()->is_empty_address(address, false)) {
629 		AddressModule()->get_loopback_address((sockaddr *)&_address);
630 		// for IPv4 and IPv6 the port is at the same offset
631 		((sockaddr_in &)_address).sin_port = ((sockaddr_in *)address)->sin_port;
632 		address = (sockaddr *)&_address;
633 	}
634 
635 	status_t status = _PrepareSendPath(address);
636 	if (status < B_OK)
637 		return status;
638 
639 	TRACE("  Connect(): starting 3-way handshake...");
640 
641 	fState = SYNCHRONIZE_SENT;
642 	T(State(this));
643 
644 	// send SYN
645 	status = _SendQueued();
646 	if (status != B_OK) {
647 		_Close();
648 		return status;
649 	}
650 
651 	// If we are running over Loopback, after _SendQueued() returns we
652 	// may be in ESTABLISHED already.
653 	if (fState == ESTABLISHED) {
654 		TRACE("  Connect() completed after _SendQueued()");
655 		return B_OK;
656 	}
657 
658 	// wait until 3-way handshake is complete (if needed)
659 	bigtime_t timeout = min_c(socket->send.timeout, TCP_CONNECTION_TIMEOUT);
660 	if (timeout == 0) {
661 		// we're a non-blocking socket
662 		TRACE("  Connect() delayed, return EINPROGRESS");
663 		return EINPROGRESS;
664 	}
665 
666 	bigtime_t absoluteTimeout = absolute_timeout(timeout);
667 	gStackModule->store_syscall_restart_timeout(absoluteTimeout);
668 
669 	status = _WaitForEstablished(locker, absoluteTimeout);
670 	TRACE("  Connect(): Connection complete: %s (timeout was %" B_PRIdBIGTIME
671 		")", strerror(status), timeout);
672 	return posix_error(status);
673 }
674 
675 
676 status_t
677 TCPEndpoint::Accept(struct net_socket** _acceptedSocket)
678 {
679 	MutexLocker locker(fLock);
680 
681 	TRACE("Accept()");
682 	T(APICall(this, "accept"));
683 
684 	status_t status;
685 	bigtime_t timeout = absolute_timeout(socket->receive.timeout);
686 	if (gStackModule->is_restarted_syscall())
687 		timeout = gStackModule->restore_syscall_restart_timeout();
688 	else
689 		gStackModule->store_syscall_restart_timeout(timeout);
690 
691 	do {
692 		locker.Unlock();
693 
694 		status = acquire_sem_etc(fAcceptSemaphore, 1, B_ABSOLUTE_TIMEOUT
695 			| B_CAN_INTERRUPT, timeout);
696 		if (status != B_OK) {
697 			if (status == B_TIMED_OUT && socket->receive.timeout == 0)
698 				return B_WOULD_BLOCK;
699 
700 			return status;
701 		}
702 
703 		locker.Lock();
704 		status = gSocketModule->dequeue_connected(socket, _acceptedSocket);
705 #ifdef TRACE_TCP
706 		if (status == B_OK)
707 			TRACE("  Accept() returning %p", (*_acceptedSocket)->first_protocol);
708 #endif
709 	} while (status != B_OK);
710 
711 	return status;
712 }
713 
714 
715 status_t
716 TCPEndpoint::Bind(const sockaddr *address)
717 {
718 	if (address == NULL)
719 		return B_BAD_VALUE;
720 
721 	MutexLocker lock(fLock);
722 
723 	TRACE("Bind() on address %s", PrintAddress(address));
724 	T(APICall(this, "bind"));
725 
726 	if (fState != CLOSED)
727 		return EISCONN;
728 
729 	return fManager->Bind(this, address);
730 }
731 
732 
733 status_t
734 TCPEndpoint::Unbind(struct sockaddr *address)
735 {
736 	MutexLocker _(fLock);
737 
738 	TRACE("Unbind()");
739 	T(APICall(this, "unbind"));
740 
741 	return fManager->Unbind(this);
742 }
743 
744 
745 status_t
746 TCPEndpoint::Listen(int count)
747 {
748 	MutexLocker _(fLock);
749 
750 	TRACE("Listen()");
751 	T(APICall(this, "listen"));
752 
753 	if (fState != CLOSED && fState != LISTEN)
754 		return B_BAD_VALUE;
755 
756 	if (fState == CLOSED) {
757 		fAcceptSemaphore = create_sem(0, "tcp accept");
758 		if (fAcceptSemaphore < B_OK)
759 			return ENOBUFS;
760 
761 		status_t status = fManager->SetPassive(this);
762 		if (status != B_OK) {
763 			delete_sem(fAcceptSemaphore);
764 			fAcceptSemaphore = -1;
765 			return status;
766 		}
767 	}
768 
769 	gSocketModule->set_max_backlog(socket, count);
770 
771 	fState = LISTEN;
772 	T(State(this));
773 	return B_OK;
774 }
775 
776 
777 status_t
778 TCPEndpoint::Shutdown(int direction)
779 {
780 	MutexLocker lock(fLock);
781 
782 	TRACE("Shutdown(%i)", direction);
783 	T(APICall(this, "shutdown"));
784 
785 	if (direction == SHUT_RD || direction == SHUT_RDWR)
786 		fFlags |= FLAG_NO_RECEIVE;
787 
788 	if (direction == SHUT_WR || direction == SHUT_RDWR) {
789 		// TODO: That's not correct. After read/write shutting down the socket
790 		// one should still be able to read previously arrived data.
791 		_Disconnect(false);
792 	}
793 
794 	return B_OK;
795 }
796 
797 
798 /*!	Puts data contained in \a buffer into send buffer */
799 status_t
800 TCPEndpoint::SendData(net_buffer *buffer)
801 {
802 	MutexLocker lock(fLock);
803 
804 	TRACE("SendData(buffer %p, size %" B_PRIu32 ", flags %#" B_PRIx32
805 		") [total %" B_PRIuSIZE " bytes, has %" B_PRIuSIZE "]", buffer,
806 		buffer->size, buffer->flags, fSendQueue.Size(), fSendQueue.Free());
807 	T(APICall(this, "senddata"));
808 
809 	uint32 flags = buffer->flags;
810 
811 	if (fState == CLOSED)
812 		return ENOTCONN;
813 	if (fState == LISTEN)
814 		return EDESTADDRREQ;
815 	if (!is_writable(fState) && !is_establishing(fState)) {
816 		// we only send signals when called from userland
817 		if (gStackModule->is_syscall() && (flags & MSG_NOSIGNAL) == 0)
818 			send_signal(find_thread(NULL), SIGPIPE);
819 		return EPIPE;
820 	}
821 
822 	size_t left = buffer->size;
823 
824 	bigtime_t timeout = absolute_timeout(socket->send.timeout);
825 	if (gStackModule->is_restarted_syscall())
826 		timeout = gStackModule->restore_syscall_restart_timeout();
827 	else
828 		gStackModule->store_syscall_restart_timeout(timeout);
829 
830 	while (left > 0) {
831 		while (fSendQueue.Free() < socket->send.low_water_mark) {
832 			// wait until enough space is available
833 			status_t status = _WaitForCondition(fSendCondition, lock, timeout);
834 			if (status < B_OK) {
835 				TRACE("  SendData() returning %s (%d)",
836 					strerror(posix_error(status)), (int)posix_error(status));
837 				return posix_error(status);
838 			}
839 
840 			if (!is_writable(fState) && !is_establishing(fState)) {
841 				// we only send signals when called from userland
842 				if (gStackModule->is_syscall())
843 					send_signal(find_thread(NULL), SIGPIPE);
844 				return EPIPE;
845 			}
846 		}
847 
848 		size_t size = fSendQueue.Free();
849 		if (size < left) {
850 			// we need to split the original buffer
851 			net_buffer* clone = gBufferModule->clone(buffer, false);
852 				// TODO: add offset/size parameter to net_buffer::clone() or
853 				// even a move_data() function, as this is a bit inefficient
854 			if (clone == NULL)
855 				return ENOBUFS;
856 
857 			status_t status = gBufferModule->trim(clone, size);
858 			if (status != B_OK) {
859 				gBufferModule->free(clone);
860 				return status;
861 			}
862 
863 			gBufferModule->remove_header(buffer, size);
864 			left -= size;
865 			fSendQueue.Add(clone);
866 		} else {
867 			left -= buffer->size;
868 			fSendQueue.Add(buffer);
869 		}
870 	}
871 
872 	TRACE("  SendData(): %" B_PRIuSIZE " bytes used.", fSendQueue.Used());
873 
874 	bool force = false;
875 	if ((flags & MSG_OOB) != 0) {
876 		fSendUrgentOffset = fSendQueue.LastSequence();
877 			// RFC 961 specifies that the urgent offset points to the last
878 			// byte of urgent data. However, this is commonly implemented as
879 			// here, ie. it points to the first byte after the urgent data.
880 		force = true;
881 	}
882 	if ((flags & MSG_EOF) != 0)
883 		_Disconnect(false);
884 
885 	if (fState == ESTABLISHED || fState == FINISH_RECEIVED)
886 		_SendQueued(force);
887 
888 	return B_OK;
889 }
890 
891 
892 ssize_t
893 TCPEndpoint::SendAvailable()
894 {
895 	MutexLocker locker(fLock);
896 
897 	ssize_t available;
898 
899 	if (is_writable(fState))
900 		available = fSendQueue.Free();
901 	else if (is_establishing(fState))
902 		available = 0;
903 	else
904 		available = EPIPE;
905 
906 	TRACE("SendAvailable(): %" B_PRIdSSIZE, available);
907 	T(APICall(this, "sendavailable"));
908 	return available;
909 }
910 
911 
912 status_t
913 TCPEndpoint::FillStat(net_stat *stat)
914 {
915 	MutexLocker _(fLock);
916 
917 	strlcpy(stat->state, name_for_state(fState), sizeof(stat->state));
918 	stat->receive_queue_size = fReceiveQueue.Available();
919 	stat->send_queue_size = fSendQueue.Used();
920 
921 	return B_OK;
922 }
923 
924 
925 status_t
926 TCPEndpoint::ReadData(size_t numBytes, uint32 flags, net_buffer** _buffer)
927 {
928 	MutexLocker locker(fLock);
929 
930 	TRACE("ReadData(%" B_PRIuSIZE " bytes, flags %#" B_PRIx32 ")", numBytes,
931 		flags);
932 	T(APICall(this, "readdata"));
933 
934 	*_buffer = NULL;
935 
936 	if (fState == CLOSED)
937 		return ENOTCONN;
938 
939 	bigtime_t timeout = absolute_timeout(socket->receive.timeout);
940 	if (gStackModule->is_restarted_syscall())
941 		timeout = gStackModule->restore_syscall_restart_timeout();
942 	else
943 		gStackModule->store_syscall_restart_timeout(timeout);
944 
945 	if (fState == SYNCHRONIZE_SENT || fState == SYNCHRONIZE_RECEIVED) {
946 		if (flags & MSG_DONTWAIT)
947 			return B_WOULD_BLOCK;
948 
949 		status_t status = _WaitForEstablished(locker, timeout);
950 		if (status < B_OK)
951 			return posix_error(status);
952 	}
953 
954 	size_t dataNeeded = socket->receive.low_water_mark;
955 
956 	// When MSG_WAITALL is set then the function should block
957 	// until the full amount of data can be returned.
958 	if (flags & MSG_WAITALL)
959 		dataNeeded = numBytes;
960 
961 	// TODO: add support for urgent data (MSG_OOB)
962 
963 	while (true) {
964 		if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE
965 			|| fState == TIME_WAIT) {
966 			// ``Connection closing''.
967 			return B_OK;
968 		}
969 
970 		if (fReceiveQueue.Available() > 0) {
971 			if (fReceiveQueue.Available() >= dataNeeded
972 				|| (fReceiveQueue.PushedData() > 0
973 					&& fReceiveQueue.PushedData() >= fReceiveQueue.Available()))
974 				break;
975 		} else if (fState == FINISH_RECEIVED) {
976 			// ``If no text is awaiting delivery, the RECEIVE will
977 			//   get a Connection closing''.
978 			return B_OK;
979 		}
980 
981 		if ((flags & MSG_DONTWAIT) != 0 || socket->receive.timeout == 0)
982 			return B_WOULD_BLOCK;
983 
984 		if ((fFlags & FLAG_NO_RECEIVE) != 0)
985 			return B_OK;
986 
987 		status_t status = _WaitForCondition(fReceiveCondition, locker, timeout);
988 		if (status < B_OK) {
989 			// The Open Group base specification mentions that EINTR should be
990 			// returned if the recv() is interrupted before _any data_ is
991 			// available. So we actually check if there is data, and if so,
992 			// push it to the user.
993 			if ((status == B_TIMED_OUT || status == B_INTERRUPTED)
994 				&& fReceiveQueue.Available() > 0)
995 				break;
996 
997 			return posix_error(status);
998 		}
999 	}
1000 
1001 	TRACE("  ReadData(): %" B_PRIuSIZE " are available.",
1002 		fReceiveQueue.Available());
1003 
1004 	if (numBytes < fReceiveQueue.Available())
1005 		fReceiveCondition.NotifyAll();
1006 
1007 	bool clone = (flags & MSG_PEEK) != 0;
1008 
1009 	ssize_t receivedBytes = fReceiveQueue.Get(numBytes, !clone, _buffer);
1010 
1011 	TRACE("  ReadData(): %" B_PRIuSIZE " bytes kept.",
1012 		fReceiveQueue.Available());
1013 
1014 	// if we are opening the window, check if we should send an ACK
1015 	if (!clone)
1016 		SendAcknowledge(false);
1017 
1018 	return receivedBytes;
1019 }
1020 
1021 
1022 ssize_t
1023 TCPEndpoint::ReadAvailable()
1024 {
1025 	MutexLocker locker(fLock);
1026 
1027 	TRACE("ReadAvailable(): %" B_PRIdSSIZE, _AvailableData());
1028 	T(APICall(this, "readavailable"));
1029 
1030 	return _AvailableData();
1031 }
1032 
1033 
1034 status_t
1035 TCPEndpoint::SetSendBufferSize(size_t length)
1036 {
1037 	MutexLocker _(fLock);
1038 	fSendQueue.SetMaxBytes(length);
1039 	return B_OK;
1040 }
1041 
1042 
1043 status_t
1044 TCPEndpoint::SetReceiveBufferSize(size_t length)
1045 {
1046 	MutexLocker _(fLock);
1047 	fReceiveQueue.SetMaxBytes(length);
1048 	return B_OK;
1049 }
1050 
1051 
1052 status_t
1053 TCPEndpoint::GetOption(int option, void* _value, int* _length)
1054 {
1055 	if (*_length != sizeof(int))
1056 		return B_BAD_VALUE;
1057 
1058 	int* value = (int*)_value;
1059 
1060 	switch (option) {
1061 		case TCP_NODELAY:
1062 			if ((fOptions & TCP_NODELAY) != 0)
1063 				*value = 1;
1064 			else
1065 				*value = 0;
1066 			return B_OK;
1067 
1068 		case TCP_MAXSEG:
1069 			*value = fReceiveMaxSegmentSize;
1070 			return B_OK;
1071 
1072 		default:
1073 			return B_BAD_VALUE;
1074 	}
1075 }
1076 
1077 
1078 status_t
1079 TCPEndpoint::SetOption(int option, const void* _value, int length)
1080 {
1081 	if (option != TCP_NODELAY)
1082 		return B_BAD_VALUE;
1083 
1084 	if (length != sizeof(int))
1085 		return B_BAD_VALUE;
1086 
1087 	const int* value = (const int*)_value;
1088 
1089 	MutexLocker _(fLock);
1090 	if (*value)
1091 		fOptions |= TCP_NODELAY;
1092 	else
1093 		fOptions &= ~TCP_NODELAY;
1094 
1095 	return B_OK;
1096 }
1097 
1098 
1099 //	#pragma mark - misc
1100 
1101 
1102 bool
1103 TCPEndpoint::IsBound() const
1104 {
1105 	return !LocalAddress().IsEmpty(true);
1106 }
1107 
1108 
1109 bool
1110 TCPEndpoint::IsLocal() const
1111 {
1112 	return (fFlags & FLAG_LOCAL) != 0;
1113 }
1114 
1115 
1116 status_t
1117 TCPEndpoint::DelayedAcknowledge()
1118 {
1119 	if (gStackModule->cancel_timer(&fDelayedAcknowledgeTimer)) {
1120 		// timer was active, send an ACK now (with the exception above,
1121 		// we send every other ACK)
1122 		T(TimerSet(this, "delayed ack", -1));
1123 		return SendAcknowledge(true);
1124 	}
1125 
1126 	gStackModule->set_timer(&fDelayedAcknowledgeTimer,
1127 		TCP_DELAYED_ACKNOWLEDGE_TIMEOUT);
1128 	T(TimerSet(this, "delayed ack", TCP_DELAYED_ACKNOWLEDGE_TIMEOUT));
1129 	return B_OK;
1130 }
1131 
1132 
1133 status_t
1134 TCPEndpoint::SendAcknowledge(bool force)
1135 {
1136 	return _SendQueued(force, 0);
1137 }
1138 
1139 
1140 void
1141 TCPEndpoint::_StartPersistTimer()
1142 {
1143 	gStackModule->set_timer(&fPersistTimer, TCP_PERSIST_TIMEOUT);
1144 	T(TimerSet(this, "persist", TCP_PERSIST_TIMEOUT));
1145 }
1146 
1147 
1148 void
1149 TCPEndpoint::_EnterTimeWait()
1150 {
1151 	TRACE("_EnterTimeWait()");
1152 
1153 	if (fState == TIME_WAIT) {
1154 		_CancelConnectionTimers();
1155 	}
1156 
1157 	_UpdateTimeWait();
1158 }
1159 
1160 
1161 void
1162 TCPEndpoint::_UpdateTimeWait()
1163 {
1164 	gStackModule->set_timer(&fTimeWaitTimer, TCP_MAX_SEGMENT_LIFETIME << 1);
1165 	T(TimerSet(this, "time-wait", TCP_MAX_SEGMENT_LIFETIME << 1));
1166 }
1167 
1168 
1169 void
1170 TCPEndpoint::_CancelConnectionTimers()
1171 {
1172 	gStackModule->cancel_timer(&fRetransmitTimer);
1173 	T(TimerSet(this, "retransmit", -1));
1174 	gStackModule->cancel_timer(&fPersistTimer);
1175 	T(TimerSet(this, "persist", -1));
1176 	gStackModule->cancel_timer(&fDelayedAcknowledgeTimer);
1177 	T(TimerSet(this, "delayed ack", -1));
1178 }
1179 
1180 
1181 /*!	Sends the FIN flag to the peer when the connection is still open.
1182 	Moves the endpoint to the next state depending on where it was.
1183 */
1184 status_t
1185 TCPEndpoint::_Disconnect(bool closing)
1186 {
1187 	tcp_state previousState = fState;
1188 
1189 	if (fState == SYNCHRONIZE_RECEIVED || fState == ESTABLISHED)
1190 		fState = FINISH_SENT;
1191 	else if (fState == FINISH_RECEIVED)
1192 		fState = WAIT_FOR_FINISH_ACKNOWLEDGE;
1193 	else
1194 		return B_OK;
1195 
1196 	T(State(this));
1197 
1198 	status_t status = _SendQueued();
1199 	if (status != B_OK) {
1200 		fState = previousState;
1201 		T(State(this));
1202 		return status;
1203 	}
1204 
1205 	return B_OK;
1206 }
1207 
1208 
1209 void
1210 TCPEndpoint::_MarkEstablished()
1211 {
1212 	fState = ESTABLISHED;
1213 	T(State(this));
1214 
1215 	gSocketModule->set_connected(socket);
1216 	if (gSocketModule->has_parent(socket))
1217 		release_sem_etc(fAcceptSemaphore, 1, B_DO_NOT_RESCHEDULE);
1218 
1219 	fSendCondition.NotifyAll();
1220 	gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Free());
1221 }
1222 
1223 
1224 status_t
1225 TCPEndpoint::_WaitForEstablished(MutexLocker &locker, bigtime_t timeout)
1226 {
1227 	// TODO: Checking for CLOSED seems correct, but breaks several neon tests.
1228 	// When investigating this, also have a look at _Close() and _HandleReset().
1229 	while (fState < ESTABLISHED/* && fState != CLOSED*/) {
1230 		if (socket->error != B_OK)
1231 			return socket->error;
1232 
1233 		status_t status = _WaitForCondition(fSendCondition, locker, timeout);
1234 		if (status < B_OK)
1235 			return status;
1236 	}
1237 
1238 	return B_OK;
1239 }
1240 
1241 
1242 //	#pragma mark - receive
1243 
1244 
1245 void
1246 TCPEndpoint::_Close()
1247 {
1248 	_CancelConnectionTimers();
1249 	fState = CLOSED;
1250 	T(State(this));
1251 
1252 	fFlags |= FLAG_DELETE_ON_CLOSE;
1253 
1254 	fSendCondition.NotifyAll();
1255 	_NotifyReader();
1256 
1257 	if (gSocketModule->has_parent(socket)) {
1258 		// We still have a parent - obviously, we haven't been accepted yet,
1259 		// so no one could ever close us.
1260 		_CancelConnectionTimers();
1261 		gSocketModule->set_aborted(socket);
1262 	}
1263 }
1264 
1265 
1266 void
1267 TCPEndpoint::_HandleReset(status_t error)
1268 {
1269 	socket->error = error;
1270 	_Close();
1271 
1272 	gSocketModule->notify(socket, B_SELECT_WRITE, error);
1273 	gSocketModule->notify(socket, B_SELECT_ERROR, error);
1274 }
1275 
1276 
1277 void
1278 TCPEndpoint::_DuplicateAcknowledge(tcp_segment_header &segment)
1279 {
1280 	if (fDuplicateAcknowledgeCount == 0)
1281 		fPreviousFlightSize = (fSendMax - fSendUnacknowledged).Number();
1282 
1283 	if (++fDuplicateAcknowledgeCount < 3) {
1284 		if (fSendQueue.Available(fSendMax) != 0  && fSendWindow != 0) {
1285 			fSendNext = fSendMax;
1286 			fCongestionWindow += fDuplicateAcknowledgeCount * fSendMaxSegmentSize;
1287 			_SendQueued();
1288 			TRACE("_DuplicateAcknowledge(): packet sent under limited transmit on receipt of dup ack");
1289 			fCongestionWindow -= fDuplicateAcknowledgeCount * fSendMaxSegmentSize;
1290 		}
1291 	}
1292 
1293 	if (fDuplicateAcknowledgeCount == 3) {
1294 		if ((segment.acknowledge - 1) > fRecover || (fCongestionWindow > fSendMaxSegmentSize &&
1295 			(fSendUnacknowledged - fPreviousHighestAcknowledge) <= 4 * fSendMaxSegmentSize)) {
1296 			fFlags |= FLAG_RECOVERY;
1297 			fRecover = fSendMax.Number() - 1;
1298 			fSlowStartThreshold = max_c(fPreviousFlightSize / 2, 2 * fSendMaxSegmentSize);
1299 			fCongestionWindow = fSlowStartThreshold + 3 * fSendMaxSegmentSize;
1300 			fSendNext = segment.acknowledge;
1301 			_SendQueued();
1302 			TRACE("_DuplicateAcknowledge(): packet sent under fast restransmit on the receipt of 3rd dup ack");
1303 		}
1304 	} else if (fDuplicateAcknowledgeCount > 3) {
1305 		uint32 flightSize = (fSendMax - fSendUnacknowledged).Number();
1306 		if ((fDuplicateAcknowledgeCount - 3) * fSendMaxSegmentSize <= flightSize)
1307 			fCongestionWindow += fSendMaxSegmentSize;
1308 		if (fSendQueue.Available(fSendMax) != 0) {
1309 			fSendNext = fSendMax;
1310 			_SendQueued();
1311 		}
1312 	}
1313 }
1314 
1315 
1316 void
1317 TCPEndpoint::_UpdateTimestamps(tcp_segment_header& segment,
1318 	size_t segmentLength)
1319 {
1320 	if (fFlags & FLAG_OPTION_TIMESTAMP) {
1321 		tcp_sequence sequence(segment.sequence);
1322 
1323 		if (fLastAcknowledgeSent >= sequence
1324 			&& fLastAcknowledgeSent < (sequence + segmentLength))
1325 			fReceivedTimestamp = segment.timestamp_value;
1326 	}
1327 }
1328 
1329 
1330 ssize_t
1331 TCPEndpoint::_AvailableData() const
1332 {
1333 	// TODO: Refer to the FLAG_NO_RECEIVE comment above regarding
1334 	//       the application of FLAG_NO_RECEIVE in listen()ing
1335 	//       sockets.
1336 	if (fState == LISTEN)
1337 		return gSocketModule->count_connected(socket);
1338 	if (fState == SYNCHRONIZE_SENT)
1339 		return 0;
1340 
1341 	ssize_t availableData = fReceiveQueue.Available();
1342 
1343 	if (availableData == 0 && !_ShouldReceive())
1344 		return ENOTCONN;
1345 
1346 	return availableData;
1347 }
1348 
1349 
1350 void
1351 TCPEndpoint::_NotifyReader()
1352 {
1353 	fReceiveCondition.NotifyAll();
1354 	gSocketModule->notify(socket, B_SELECT_READ, _AvailableData());
1355 }
1356 
1357 
1358 bool
1359 TCPEndpoint::_AddData(tcp_segment_header& segment, net_buffer* buffer)
1360 {
1361 	if ((segment.flags & TCP_FLAG_FINISH) != 0) {
1362 		// Remember the position of the finish received flag
1363 		fFinishReceived = true;
1364 		fFinishReceivedAt = segment.sequence + buffer->size;
1365 	}
1366 
1367 	fReceiveQueue.Add(buffer, segment.sequence);
1368 	fReceiveNext = fReceiveQueue.NextSequence();
1369 
1370 	if (fFinishReceived) {
1371 		// Set or reset the finish flag on the current segment
1372 		if (fReceiveNext < fFinishReceivedAt)
1373 			segment.flags &= ~TCP_FLAG_FINISH;
1374 		else
1375 			segment.flags |= TCP_FLAG_FINISH;
1376 	}
1377 
1378 	TRACE("  _AddData(): adding data, receive next = %" B_PRIu32 ". Now have %"
1379 		B_PRIuSIZE " bytes.", fReceiveNext.Number(), fReceiveQueue.Available());
1380 
1381 	if ((segment.flags & TCP_FLAG_PUSH) != 0)
1382 		fReceiveQueue.SetPushPointer();
1383 
1384 	return fReceiveQueue.Available() > 0;
1385 }
1386 
1387 
1388 void
1389 TCPEndpoint::_PrepareReceivePath(tcp_segment_header& segment)
1390 {
1391 	fInitialReceiveSequence = segment.sequence;
1392 	fFinishReceived = false;
1393 
1394 	// count the received SYN
1395 	segment.sequence++;
1396 
1397 	fReceiveNext = segment.sequence;
1398 	fReceiveQueue.SetInitialSequence(segment.sequence);
1399 
1400 	if ((fOptions & TCP_NOOPT) == 0) {
1401 		if (segment.max_segment_size > 0)
1402 			fSendMaxSegmentSize = segment.max_segment_size;
1403 
1404 		if (segment.options & TCP_HAS_WINDOW_SCALE) {
1405 			fFlags |= FLAG_OPTION_WINDOW_SCALE;
1406 			fSendWindowShift = segment.window_shift;
1407 		} else {
1408 			fFlags &= ~FLAG_OPTION_WINDOW_SCALE;
1409 			fReceiveWindowShift = 0;
1410 		}
1411 
1412 		if (segment.options & TCP_HAS_TIMESTAMPS) {
1413 			fFlags |= FLAG_OPTION_TIMESTAMP;
1414 			fReceivedTimestamp = segment.timestamp_value;
1415 		} else
1416 			fFlags &= ~FLAG_OPTION_TIMESTAMP;
1417 
1418 		if ((segment.options & TCP_SACK_PERMITTED) == 0)
1419 			fFlags &= ~FLAG_OPTION_SACK_PERMITTED;
1420 	}
1421 
1422 	if (fSendMaxSegmentSize > 2190)
1423 		fCongestionWindow = 2 * fSendMaxSegmentSize;
1424 	else if (fSendMaxSegmentSize > 1095)
1425 		fCongestionWindow = 3 * fSendMaxSegmentSize;
1426 	else
1427 		fCongestionWindow = 4 * fSendMaxSegmentSize;
1428 
1429 	fSendMaxSegments = fCongestionWindow / fSendMaxSegmentSize;
1430 	fSlowStartThreshold = (uint32)segment.advertised_window << fSendWindowShift;
1431 }
1432 
1433 
1434 bool
1435 TCPEndpoint::_ShouldReceive() const
1436 {
1437 	if ((fFlags & FLAG_NO_RECEIVE) != 0)
1438 		return false;
1439 
1440 	return fState == ESTABLISHED || fState == FINISH_SENT
1441 		|| fState == FINISH_ACKNOWLEDGED;
1442 }
1443 
1444 
1445 int32
1446 TCPEndpoint::_Spawn(TCPEndpoint* parent, tcp_segment_header& segment,
1447 	net_buffer* buffer)
1448 {
1449 	MutexLocker _(fLock);
1450 
1451 	// TODO error checking
1452 	ProtocolSocket::Open();
1453 
1454 	fState = SYNCHRONIZE_RECEIVED;
1455 	T(Spawn(parent, this));
1456 
1457 	fManager = parent->fManager;
1458 
1459 	LocalAddress().SetTo(buffer->destination);
1460 	PeerAddress().SetTo(buffer->source);
1461 
1462 	TRACE("Spawn()");
1463 
1464 	// TODO: proper error handling!
1465 	if (fManager->BindChild(this) != B_OK) {
1466 		T(Error(this, "binding failed", __LINE__));
1467 		return DROP;
1468 	}
1469 	if (_PrepareSendPath(*PeerAddress()) != B_OK) {
1470 		T(Error(this, "prepare send faild", __LINE__));
1471 		return DROP;
1472 	}
1473 
1474 	fOptions = parent->fOptions;
1475 	fAcceptSemaphore = parent->fAcceptSemaphore;
1476 
1477 	_PrepareReceivePath(segment);
1478 
1479 	// send SYN+ACK
1480 	if (_SendQueued() != B_OK) {
1481 		T(Error(this, "sending failed", __LINE__));
1482 		return DROP;
1483 	}
1484 
1485 	segment.flags &= ~TCP_FLAG_SYNCHRONIZE;
1486 		// we handled this flag now, it must not be set for further processing
1487 
1488 	return _Receive(segment, buffer);
1489 }
1490 
1491 
1492 int32
1493 TCPEndpoint::_ListenReceive(tcp_segment_header& segment, net_buffer* buffer)
1494 {
1495 	TRACE("ListenReceive()");
1496 
1497 	// Essentially, we accept only TCP_FLAG_SYNCHRONIZE in this state,
1498 	// but the error behaviour differs
1499 	if (segment.flags & TCP_FLAG_RESET)
1500 		return DROP;
1501 	if (segment.flags & TCP_FLAG_ACKNOWLEDGE)
1502 		return DROP | RESET;
1503 	if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0)
1504 		return DROP;
1505 
1506 	// TODO: drop broadcast/multicast
1507 
1508 	// spawn new endpoint for accept()
1509 	net_socket* newSocket;
1510 	if (gSocketModule->spawn_pending_socket(socket, &newSocket) < B_OK) {
1511 		T(Error(this, "spawning failed", __LINE__));
1512 		return DROP;
1513 	}
1514 
1515 	return ((TCPEndpoint *)newSocket->first_protocol)->_Spawn(this,
1516 		segment, buffer);
1517 }
1518 
1519 
1520 int32
1521 TCPEndpoint::_SynchronizeSentReceive(tcp_segment_header &segment,
1522 	net_buffer *buffer)
1523 {
1524 	TRACE("_SynchronizeSentReceive()");
1525 
1526 	if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0
1527 		&& (fInitialSendSequence >= segment.acknowledge
1528 			|| fSendMax < segment.acknowledge))
1529 		return DROP | RESET;
1530 
1531 	if (segment.flags & TCP_FLAG_RESET) {
1532 		_HandleReset(ECONNREFUSED);
1533 		return DROP;
1534 	}
1535 
1536 	if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0)
1537 		return DROP;
1538 
1539 	fSendUnacknowledged = segment.acknowledge;
1540 	_PrepareReceivePath(segment);
1541 
1542 	if (segment.flags & TCP_FLAG_ACKNOWLEDGE) {
1543 		_MarkEstablished();
1544 	} else {
1545 		// simultaneous open
1546 		fState = SYNCHRONIZE_RECEIVED;
1547 		T(State(this));
1548 	}
1549 
1550 	segment.flags &= ~TCP_FLAG_SYNCHRONIZE;
1551 		// we handled this flag now, it must not be set for further processing
1552 
1553 	return _Receive(segment, buffer) | IMMEDIATE_ACKNOWLEDGE;
1554 }
1555 
1556 
1557 int32
1558 TCPEndpoint::_Receive(tcp_segment_header& segment, net_buffer* buffer)
1559 {
1560 	// PAWS processing takes precedence over regular TCP acceptability check
1561 	if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0 && (segment.flags & TCP_FLAG_RESET) == 0) {
1562 		if ((segment.options & TCP_HAS_TIMESTAMPS) == 0)
1563 			return DROP;
1564 		if ((int32)(fReceivedTimestamp - segment.timestamp_value) > 0
1565 			&& (fReceivedTimestamp - segment.timestamp_value) <= INT32_MAX)
1566 			return DROP | IMMEDIATE_ACKNOWLEDGE;
1567 	}
1568 
1569 	uint32 advertisedWindow = (uint32)segment.advertised_window
1570 		<< fSendWindowShift;
1571 	size_t segmentLength = buffer->size;
1572 
1573 	// First, handle the most common case for uni-directional data transfer
1574 	// (known as header prediction - the segment must not change the window,
1575 	// and must be the expected sequence, and contain no control flags)
1576 
1577 	if (fState == ESTABLISHED
1578 		&& segment.AcknowledgeOnly()
1579 		&& fReceiveNext == segment.sequence
1580 		&& advertisedWindow > 0 && advertisedWindow == fSendWindow
1581 		&& fSendNext == fSendMax) {
1582 		_UpdateTimestamps(segment, segmentLength);
1583 
1584 		if (segmentLength == 0) {
1585 			// this is a pure acknowledge segment - we're on the sending end
1586 			if (fSendUnacknowledged < segment.acknowledge
1587 				&& fSendMax >= segment.acknowledge) {
1588 				_Acknowledged(segment);
1589 				return DROP;
1590 			}
1591 		} else if (segment.acknowledge == fSendUnacknowledged
1592 			&& fReceiveQueue.IsContiguous()
1593 			&& fReceiveQueue.Free() >= segmentLength
1594 			&& (fFlags & FLAG_NO_RECEIVE) == 0) {
1595 			if (_AddData(segment, buffer))
1596 				_NotifyReader();
1597 
1598 			return KEEP | ((segment.flags & TCP_FLAG_PUSH) != 0
1599 				? IMMEDIATE_ACKNOWLEDGE : ACKNOWLEDGE);
1600 		}
1601 	}
1602 
1603 	// The fast path was not applicable, so we continue with the standard
1604 	// processing of the incoming segment
1605 
1606 	ASSERT(fState != SYNCHRONIZE_SENT && fState != LISTEN);
1607 
1608 	if (fState != CLOSED && fState != TIME_WAIT) {
1609 		// Check sequence number
1610 		if (!segment_in_sequence(segment, segmentLength, fReceiveNext,
1611 				fReceiveWindow)) {
1612 			TRACE("  Receive(): segment out of window, next: %" B_PRIu32
1613 				" wnd: %" B_PRIu32, fReceiveNext.Number(), fReceiveWindow);
1614 			if ((segment.flags & TCP_FLAG_RESET) != 0) {
1615 				// TODO: this doesn't look right - review!
1616 				return DROP;
1617 			}
1618 			return DROP | IMMEDIATE_ACKNOWLEDGE;
1619 		}
1620 	}
1621 
1622 	if ((segment.flags & TCP_FLAG_RESET) != 0) {
1623 		// Is this a valid reset?
1624 		// We generally ignore resets in time wait state (see RFC 1337)
1625 		if (fLastAcknowledgeSent <= segment.sequence
1626 			&& tcp_sequence(segment.sequence) < (fLastAcknowledgeSent
1627 				+ fReceiveWindow)
1628 			&& fState != TIME_WAIT) {
1629 			status_t error;
1630 			if (fState == SYNCHRONIZE_RECEIVED)
1631 				error = ECONNREFUSED;
1632 			else if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE)
1633 				error = ENOTCONN;
1634 			else
1635 				error = ECONNRESET;
1636 
1637 			_HandleReset(error);
1638 		}
1639 
1640 		return DROP;
1641 	}
1642 
1643 	if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0
1644 		|| (fState == SYNCHRONIZE_RECEIVED
1645 			&& (fInitialReceiveSequence > segment.sequence
1646 				|| ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0
1647 					&& (fSendUnacknowledged > segment.acknowledge
1648 						|| fSendMax < segment.acknowledge))))) {
1649 		// reset the connection - either the initial SYN was faulty, or we
1650 		// received a SYN within the data stream
1651 		return DROP | RESET;
1652 	}
1653 
1654 	// TODO: Check this! Why do we advertize a window outside of what we should
1655 	// buffer?
1656 	fReceiveWindow = max_c(fReceiveQueue.Free(), fReceiveWindow);
1657 		// the window must not shrink
1658 
1659 	// trim buffer to be within the receive window
1660 	int32 drop = (int32)(fReceiveNext - segment.sequence).Number();
1661 	if (drop > 0) {
1662 		if ((uint32)drop > buffer->size
1663 			|| ((uint32)drop == buffer->size
1664 				&& (segment.flags & TCP_FLAG_FINISH) == 0)) {
1665 			// don't accidently remove a FIN we shouldn't remove
1666 			segment.flags &= ~TCP_FLAG_FINISH;
1667 			drop = buffer->size;
1668 		}
1669 
1670 		// remove duplicate data at the start
1671 		TRACE("* remove %" B_PRId32 " bytes from the start", drop);
1672 		gBufferModule->remove_header(buffer, drop);
1673 		segment.sequence += drop;
1674 	}
1675 
1676 	int32 action = KEEP;
1677 
1678 	// immediately acknowledge out-of-order segment to trigger fast-retransmit at the sender
1679 	if (drop != 0)
1680 		action |= IMMEDIATE_ACKNOWLEDGE;
1681 
1682 	drop = (int32)(segment.sequence + buffer->size
1683 		- (fReceiveNext + fReceiveWindow)).Number();
1684 	if (drop > 0) {
1685 		// remove data exceeding our window
1686 		if ((uint32)drop >= buffer->size) {
1687 			// if we can accept data, or the segment is not what we'd expect,
1688 			// drop the segment (an immediate acknowledge is always triggered)
1689 			if (fReceiveWindow != 0 || segment.sequence != fReceiveNext)
1690 				return DROP | IMMEDIATE_ACKNOWLEDGE;
1691 
1692 			action |= IMMEDIATE_ACKNOWLEDGE;
1693 		}
1694 
1695 		if ((segment.flags & TCP_FLAG_FINISH) != 0) {
1696 			// we need to remove the finish, too, as part of the data
1697 			drop--;
1698 		}
1699 
1700 		segment.flags &= ~(TCP_FLAG_FINISH | TCP_FLAG_PUSH);
1701 		TRACE("* remove %" B_PRId32 " bytes from the end", drop);
1702 		gBufferModule->remove_trailer(buffer, drop);
1703 	}
1704 
1705 #ifdef TRACE_TCP
1706 	if (advertisedWindow > fSendWindow) {
1707 		TRACE("  Receive(): Window update %" B_PRIu32 " -> %" B_PRIu32,
1708 			fSendWindow, advertisedWindow);
1709 	}
1710 #endif
1711 
1712 	if (advertisedWindow > fSendWindow)
1713 		action |= IMMEDIATE_ACKNOWLEDGE;
1714 
1715 	fSendWindow = advertisedWindow;
1716 	if (advertisedWindow > fSendMaxWindow)
1717 		fSendMaxWindow = advertisedWindow;
1718 
1719 	// Then look at the acknowledgement for any updates
1720 
1721 	if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0) {
1722 		// process acknowledged data
1723 		if (fState == SYNCHRONIZE_RECEIVED)
1724 			_MarkEstablished();
1725 
1726 		if (fSendMax < segment.acknowledge)
1727 			return DROP | IMMEDIATE_ACKNOWLEDGE;
1728 
1729 		if (segment.acknowledge == fSendUnacknowledged) {
1730 			if (buffer->size == 0 && advertisedWindow == fSendWindow
1731 				&& (segment.flags & TCP_FLAG_FINISH) == 0 && fSendUnacknowledged != fSendMax) {
1732 				TRACE("Receive(): duplicate ack!");
1733 				_DuplicateAcknowledge(segment);
1734 			}
1735 		} else if (segment.acknowledge < fSendUnacknowledged) {
1736 			return DROP;
1737 		} else {
1738 			// this segment acknowledges in flight data
1739 
1740 			if (fDuplicateAcknowledgeCount >= 3) {
1741 				// deflate the window.
1742 				if (segment.acknowledge > fRecover) {
1743 					uint32 flightSize = (fSendMax - fSendUnacknowledged).Number();
1744 					fCongestionWindow = min_c(fSlowStartThreshold,
1745 						max_c(flightSize, fSendMaxSegmentSize) + fSendMaxSegmentSize);
1746 					fFlags &= ~FLAG_RECOVERY;
1747 				}
1748 			}
1749 
1750 			if (fSendMax == segment.acknowledge)
1751 				TRACE("Receive(): all inflight data ack'd!");
1752 
1753 			if (segment.acknowledge > fSendQueue.LastSequence()
1754 					&& fState > ESTABLISHED) {
1755 				TRACE("Receive(): FIN has been acknowledged!");
1756 
1757 				switch (fState) {
1758 					case FINISH_SENT:
1759 						fState = FINISH_ACKNOWLEDGED;
1760 						T(State(this));
1761 						break;
1762 					case CLOSING:
1763 						fState = TIME_WAIT;
1764 						T(State(this));
1765 						_EnterTimeWait();
1766 						return DROP;
1767 					case WAIT_FOR_FINISH_ACKNOWLEDGE:
1768 						_Close();
1769 						break;
1770 
1771 					default:
1772 						break;
1773 				}
1774 			}
1775 
1776 			if (fState != CLOSED) {
1777 				tcp_sequence last = fLastAcknowledgeSent;
1778 				_Acknowledged(segment);
1779 				// we just sent an acknowledge, remove from action
1780 				if (last < fLastAcknowledgeSent)
1781 					action &= ~IMMEDIATE_ACKNOWLEDGE;
1782 			}
1783 		}
1784 	}
1785 
1786 	if (segment.flags & TCP_FLAG_URGENT) {
1787 		if (fState == ESTABLISHED || fState == FINISH_SENT
1788 			|| fState == FINISH_ACKNOWLEDGED) {
1789 			// TODO: Handle urgent data:
1790 			//  - RCV.UP <- max(RCV.UP, SEG.UP)
1791 			//  - signal the user that urgent data is available (SIGURG)
1792 		}
1793 	}
1794 
1795 	bool notify = false;
1796 
1797 	// The buffer may be freed if its data is added to the queue, so cache
1798 	// the size as we still need it later.
1799 	uint32 bufferSize = buffer->size;
1800 
1801 	if ((bufferSize > 0 || (segment.flags & TCP_FLAG_FINISH) != 0)
1802 		&& _ShouldReceive())
1803 		notify = _AddData(segment, buffer);
1804 	else {
1805 		if ((fFlags & FLAG_NO_RECEIVE) != 0)
1806 			fReceiveNext += buffer->size;
1807 
1808 		action = (action & ~KEEP) | DROP;
1809 	}
1810 
1811 	if ((segment.flags & TCP_FLAG_FINISH) != 0) {
1812 		segmentLength++;
1813 		if (fState != CLOSED && fState != LISTEN && fState != SYNCHRONIZE_SENT) {
1814 			TRACE("Receive(): peer is finishing connection!");
1815 			fReceiveNext++;
1816 			notify = true;
1817 
1818 			// FIN implies PUSH
1819 			fReceiveQueue.SetPushPointer();
1820 
1821 			// we'll reply immediately to the FIN if we are not
1822 			// transitioning to TIME WAIT so we immediatly ACK it.
1823 			action |= IMMEDIATE_ACKNOWLEDGE;
1824 
1825 			// other side is closing connection; change states
1826 			switch (fState) {
1827 				case ESTABLISHED:
1828 				case SYNCHRONIZE_RECEIVED:
1829 					fState = FINISH_RECEIVED;
1830 					T(State(this));
1831 					break;
1832 				case FINISH_SENT:
1833 					// simultaneous close
1834 					fState = CLOSING;
1835 					T(State(this));
1836 					break;
1837 				case FINISH_ACKNOWLEDGED:
1838 					fState = TIME_WAIT;
1839 					T(State(this));
1840 					_EnterTimeWait();
1841 					break;
1842 				case TIME_WAIT:
1843 					_UpdateTimeWait();
1844 					break;
1845 
1846 				default:
1847 					break;
1848 			}
1849 		}
1850 	}
1851 
1852 	if (notify)
1853 		_NotifyReader();
1854 
1855 	if (bufferSize > 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) != 0)
1856 		action |= ACKNOWLEDGE;
1857 
1858 	_UpdateTimestamps(segment, segmentLength);
1859 
1860 	TRACE("Receive() Action %" B_PRId32, action);
1861 
1862 	return action;
1863 }
1864 
1865 
1866 int32
1867 TCPEndpoint::SegmentReceived(tcp_segment_header& segment, net_buffer* buffer)
1868 {
1869 	MutexLocker locker(fLock);
1870 
1871 	TRACE("SegmentReceived(): buffer %p (%" B_PRIu32 " bytes) address %s "
1872 		"to %s flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32
1873 		", wnd %" B_PRIu32, buffer, buffer->size, PrintAddress(buffer->source),
1874 		PrintAddress(buffer->destination), segment.flags, segment.sequence,
1875 		segment.acknowledge,
1876 		(uint32)segment.advertised_window << fSendWindowShift);
1877 	T(Receive(this, segment,
1878 		(uint32)segment.advertised_window << fSendWindowShift, buffer));
1879 	int32 segmentAction = DROP;
1880 
1881 	switch (fState) {
1882 		case LISTEN:
1883 			segmentAction = _ListenReceive(segment, buffer);
1884 			break;
1885 
1886 		case SYNCHRONIZE_SENT:
1887 			segmentAction = _SynchronizeSentReceive(segment, buffer);
1888 			break;
1889 
1890 		case SYNCHRONIZE_RECEIVED:
1891 		case ESTABLISHED:
1892 		case FINISH_RECEIVED:
1893 		case WAIT_FOR_FINISH_ACKNOWLEDGE:
1894 		case FINISH_SENT:
1895 		case FINISH_ACKNOWLEDGED:
1896 		case CLOSING:
1897 		case TIME_WAIT:
1898 		case CLOSED:
1899 			segmentAction = _Receive(segment, buffer);
1900 			break;
1901 	}
1902 
1903 	// process acknowledge action as asked for by the *Receive() method
1904 	if (segmentAction & IMMEDIATE_ACKNOWLEDGE)
1905 		SendAcknowledge(true);
1906 	else if (segmentAction & ACKNOWLEDGE)
1907 		DelayedAcknowledge();
1908 
1909 	if ((fFlags & (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE))
1910 			== (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) {
1911 
1912 		locker.Unlock();
1913 		if (gSocketModule->release_socket(socket))
1914 			segmentAction |= DELETED_ENDPOINT;
1915 	}
1916 
1917 	return segmentAction;
1918 }
1919 
1920 
1921 //	#pragma mark - send
1922 
1923 
1924 inline uint8
1925 TCPEndpoint::_CurrentFlags()
1926 {
1927 	// we don't set FLAG_FINISH here, instead we do it
1928 	// conditionally below depending if we are sending
1929 	// the last bytes of the send queue.
1930 
1931 	switch (fState) {
1932 		case CLOSED:
1933 			return TCP_FLAG_RESET | TCP_FLAG_ACKNOWLEDGE;
1934 
1935 		case SYNCHRONIZE_SENT:
1936 			return TCP_FLAG_SYNCHRONIZE;
1937 		case SYNCHRONIZE_RECEIVED:
1938 			return TCP_FLAG_SYNCHRONIZE | TCP_FLAG_ACKNOWLEDGE;
1939 
1940 		case ESTABLISHED:
1941 		case FINISH_RECEIVED:
1942 		case FINISH_ACKNOWLEDGED:
1943 		case TIME_WAIT:
1944 		case WAIT_FOR_FINISH_ACKNOWLEDGE:
1945 		case FINISH_SENT:
1946 		case CLOSING:
1947 			return TCP_FLAG_ACKNOWLEDGE;
1948 
1949 		default:
1950 			return 0;
1951 	}
1952 }
1953 
1954 
1955 inline bool
1956 TCPEndpoint::_ShouldSendSegment(tcp_segment_header& segment, uint32 length,
1957 	uint32 segmentMaxSize, uint32 flightSize)
1958 {
1959 	if (fState == ESTABLISHED && fSendMaxSegments == 0)
1960 		return false;
1961 
1962 	if (length > 0) {
1963 		// Avoid the silly window syndrome - we only send a segment in case:
1964 		// - we have a full segment to send, or
1965 		// - we're at the end of our buffer queue, or
1966 		// - the buffer is at least larger than half of the maximum send window,
1967 		//   or
1968 		// - we're retransmitting data
1969 		if (length == segmentMaxSize
1970 			|| (fOptions & TCP_NODELAY) != 0
1971 			|| tcp_sequence(fSendNext + length) == fSendQueue.LastSequence()
1972 			|| (fSendMaxWindow > 0 && length >= fSendMaxWindow / 2))
1973 			return true;
1974 	}
1975 
1976 	// check if we need to send a window update to the peer
1977 	if (segment.advertised_window > 0) {
1978 		// correct the window to take into account what already has been advertised
1979 		uint32 window = (segment.advertised_window << fReceiveWindowShift)
1980 			- (fReceiveMaxAdvertised - fReceiveNext).Number();
1981 
1982 		// if we can advertise a window larger than twice the maximum segment
1983 		// size, or half the maximum buffer size we send a window update
1984 		if (window >= (fReceiveMaxSegmentSize << 1)
1985 			|| window >= (socket->receive.buffer_size >> 1))
1986 			return true;
1987 	}
1988 
1989 	if ((segment.flags & (TCP_FLAG_SYNCHRONIZE | TCP_FLAG_FINISH
1990 			| TCP_FLAG_RESET)) != 0)
1991 		return true;
1992 
1993 	// We do have urgent data pending
1994 	if (fSendUrgentOffset > fSendNext)
1995 		return true;
1996 
1997 	// there is no reason to send a segment just now
1998 	return false;
1999 }
2000 
2001 
2002 status_t
2003 TCPEndpoint::_SendQueued(bool force)
2004 {
2005 	return _SendQueued(force, fSendWindow);
2006 }
2007 
2008 
2009 /*!	Sends one or more TCP segments with the data waiting in the queue, or some
2010 	specific flags that need to be sent.
2011 */
2012 status_t
2013 TCPEndpoint::_SendQueued(bool force, uint32 sendWindow)
2014 {
2015 	if (fRoute == NULL)
2016 		return B_ERROR;
2017 
2018 	// in passive state?
2019 	if (fState == LISTEN)
2020 		return B_ERROR;
2021 
2022 	tcp_segment_header segment(_CurrentFlags());
2023 
2024 	if ((fOptions & TCP_NOOPT) == 0) {
2025 		if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0) {
2026 			segment.options |= TCP_HAS_TIMESTAMPS;
2027 			segment.timestamp_reply = fReceivedTimestamp;
2028 			segment.timestamp_value = tcp_now();
2029 		}
2030 
2031 		// SACK information is embedded with duplicate acknowledgements
2032 		if (!fReceiveQueue.IsContiguous()
2033 			&& fLastAcknowledgeSent <= fReceiveNext
2034 			&& (fFlags & FLAG_OPTION_SACK_PERMITTED) != 0) {
2035 			segment.options |= TCP_HAS_SACK;
2036 			int maxSackCount = MAX_SACK_BLKS
2037 				- ((fFlags & FLAG_OPTION_TIMESTAMP) != 0);
2038 			memset(segment.sacks, 0, sizeof(segment.sacks));
2039 			segment.sackCount = fReceiveQueue.PopulateSackInfo(fReceiveNext,
2040 				maxSackCount, segment.sacks);
2041 		}
2042 
2043 		if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0
2044 			&& fSendNext == fInitialSendSequence) {
2045 			// add connection establishment options
2046 			segment.max_segment_size = fReceiveMaxSegmentSize;
2047 			if (fFlags & FLAG_OPTION_WINDOW_SCALE) {
2048 				segment.options |= TCP_HAS_WINDOW_SCALE;
2049 				segment.window_shift = fReceiveWindowShift;
2050 			}
2051 			if ((fFlags & FLAG_OPTION_SACK_PERMITTED) != 0)
2052 				segment.options |= TCP_SACK_PERMITTED;
2053 		}
2054 	}
2055 
2056 	size_t availableBytes = fReceiveQueue.Free();
2057 	// window size must remain same for duplicate acknowledgements
2058 	if (!fReceiveQueue.IsContiguous())
2059 		availableBytes = (fReceiveMaxAdvertised - fReceiveNext).Number();
2060 
2061 	if (fFlags & FLAG_OPTION_WINDOW_SCALE)
2062 		segment.advertised_window = availableBytes >> fReceiveWindowShift;
2063 	else
2064 		segment.advertised_window = min_c(TCP_MAX_WINDOW, availableBytes);
2065 
2066 	segment.acknowledge = fReceiveNext.Number();
2067 
2068 	// Process urgent data
2069 	if (fSendUrgentOffset > fSendNext) {
2070 		segment.flags |= TCP_FLAG_URGENT;
2071 		segment.urgent_offset = (fSendUrgentOffset - fSendNext).Number();
2072 	} else {
2073 		fSendUrgentOffset = fSendUnacknowledged.Number();
2074 			// Keep urgent offset updated, so that it doesn't reach into our
2075 			// send window on overlap
2076 		segment.urgent_offset = 0;
2077 	}
2078 
2079 	if (fCongestionWindow > 0 && fCongestionWindow < sendWindow)
2080 		sendWindow = fCongestionWindow;
2081 
2082 	// fSendUnacknowledged
2083 	//  |    fSendNext      fSendMax
2084 	//  |        |              |
2085 	//  v        v              v
2086 	//  -----------------------------------
2087 	//  | effective window           |
2088 	//  -----------------------------------
2089 
2090 	// Flight size represents the window of data which is currently in the
2091 	// ether. We should never send data such as the flight size becomes larger
2092 	// than the effective window. Note however that the effective window may be
2093 	// reduced (by congestion for instance), so at some point in time flight
2094 	// size may be larger than the currently calculated window.
2095 
2096 	uint32 flightSize = (fSendMax - fSendUnacknowledged).Number();
2097 	uint32 consumedWindow = (fSendNext - fSendUnacknowledged).Number();
2098 
2099 	if (consumedWindow > sendWindow) {
2100 		sendWindow = 0;
2101 		// TODO: enter persist state? try to get a window update.
2102 	} else
2103 		sendWindow -= consumedWindow;
2104 
2105 	uint32 length = min_c(fSendQueue.Available(fSendNext), sendWindow);
2106 	bool shouldStartRetransmitTimer = fSendNext == fSendUnacknowledged;
2107 	bool retransmit = fSendNext < fSendMax;
2108 
2109 	if (fDuplicateAcknowledgeCount != 0) {
2110 		// send at most 1 SMSS of data when under limited transmit, fast transmit/recovery
2111 		length = min_c(length, fSendMaxSegmentSize);
2112 	}
2113 
2114 	do {
2115 		uint32 segmentMaxSize = fSendMaxSegmentSize
2116 			- tcp_options_length(segment);
2117 		uint32 segmentLength = min_c(length, segmentMaxSize);
2118 
2119 		if (fSendNext + segmentLength == fSendQueue.LastSequence() && !force) {
2120 			if (state_needs_finish(fState))
2121 				segment.flags |= TCP_FLAG_FINISH;
2122 			if (length > 0)
2123 				segment.flags |= TCP_FLAG_PUSH;
2124 		}
2125 
2126 		// Determine if we should really send this segment
2127 		if (!force && !retransmit && !_ShouldSendSegment(segment, segmentLength,
2128 				segmentMaxSize, flightSize)) {
2129 			if (fSendQueue.Available()
2130 				&& !gStackModule->is_timer_active(&fPersistTimer)
2131 				&& !gStackModule->is_timer_active(&fRetransmitTimer))
2132 				_StartPersistTimer();
2133 			break;
2134 		}
2135 
2136 		net_buffer *buffer = gBufferModule->create(256);
2137 		if (buffer == NULL)
2138 			return B_NO_MEMORY;
2139 
2140 		status_t status = B_OK;
2141 		if (segmentLength > 0)
2142 			status = fSendQueue.Get(buffer, fSendNext, segmentLength);
2143 		if (status < B_OK) {
2144 			gBufferModule->free(buffer);
2145 			return status;
2146 		}
2147 
2148 		LocalAddress().CopyTo(buffer->source);
2149 		PeerAddress().CopyTo(buffer->destination);
2150 
2151 		uint32 size = buffer->size;
2152 		segment.sequence = fSendNext.Number();
2153 
2154 		TRACE("SendQueued(): buffer %p (%" B_PRIu32 " bytes) address %s to "
2155 			"%s flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32
2156 			", rwnd %" B_PRIu16 ", cwnd %" B_PRIu32 ", ssthresh %" B_PRIu32
2157 			", len %" B_PRIu32 ", first %" B_PRIu32 ", last %" B_PRIu32,
2158 			buffer, buffer->size, PrintAddress(buffer->source),
2159 			PrintAddress(buffer->destination), segment.flags, segment.sequence,
2160 			segment.acknowledge, segment.advertised_window,
2161 			fCongestionWindow, fSlowStartThreshold, segmentLength,
2162 			fSendQueue.FirstSequence().Number(),
2163 			fSendQueue.LastSequence().Number());
2164 		T(Send(this, segment, buffer, fSendQueue.FirstSequence(),
2165 			fSendQueue.LastSequence()));
2166 
2167 		PROBE(buffer, sendWindow);
2168 		sendWindow -= buffer->size;
2169 
2170 		status = add_tcp_header(AddressModule(), segment, buffer);
2171 		if (status != B_OK) {
2172 			gBufferModule->free(buffer);
2173 			return status;
2174 		}
2175 
2176 		// Update send status - we need to do this before we send the data
2177 		// for local connections as the answer is directly handled
2178 
2179 		if (segment.flags & TCP_FLAG_SYNCHRONIZE) {
2180 			segment.options &= ~TCP_HAS_WINDOW_SCALE;
2181 			segment.max_segment_size = 0;
2182 			size++;
2183 		}
2184 
2185 		if (segment.flags & TCP_FLAG_FINISH)
2186 			size++;
2187 
2188 		uint32 sendMax = fSendMax.Number();
2189 		fSendNext += size;
2190 		if (fSendMax < fSendNext)
2191 			fSendMax = fSendNext;
2192 
2193 		fReceiveMaxAdvertised = fReceiveNext
2194 			+ ((uint32)segment.advertised_window << fReceiveWindowShift);
2195 
2196 		if (segmentLength != 0 && fState == ESTABLISHED)
2197 			--fSendMaxSegments;
2198 
2199 		status = next->module->send_routed_data(next, fRoute, buffer);
2200 		if (status < B_OK) {
2201 			gBufferModule->free(buffer);
2202 
2203 			fSendNext = segment.sequence;
2204 			fSendMax = sendMax;
2205 				// restore send status
2206 			return status;
2207 		}
2208 
2209 		if (fSendTime == 0 && !retransmit
2210 			&& (segmentLength != 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) !=0)) {
2211 			fSendTime = tcp_now();
2212 			fRoundTripStartSequence = segment.sequence;
2213 		}
2214 
2215 		if (shouldStartRetransmitTimer && size > 0) {
2216 			TRACE("starting initial retransmit timer of: %" B_PRIdBIGTIME,
2217 				fRetransmitTimeout);
2218 			gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout);
2219 			T(TimerSet(this, "retransmit", fRetransmitTimeout));
2220 			shouldStartRetransmitTimer = false;
2221 		}
2222 
2223 		if (segment.flags & TCP_FLAG_ACKNOWLEDGE) {
2224 			fLastAcknowledgeSent = segment.acknowledge;
2225 			gStackModule->cancel_timer(&fDelayedAcknowledgeTimer);
2226 		}
2227 
2228 		length -= segmentLength;
2229 		segment.flags &= ~(TCP_FLAG_SYNCHRONIZE | TCP_FLAG_RESET
2230 			| TCP_FLAG_FINISH);
2231 
2232 		if (retransmit)
2233 			break;
2234 
2235 	} while (length > 0);
2236 
2237 	return B_OK;
2238 }
2239 
2240 
2241 int
2242 TCPEndpoint::_MaxSegmentSize(const sockaddr* address) const
2243 {
2244 	return next->module->get_mtu(next, address) - sizeof(tcp_header);
2245 }
2246 
2247 
2248 status_t
2249 TCPEndpoint::_PrepareSendPath(const sockaddr* peer)
2250 {
2251 	if (fRoute == NULL) {
2252 		fRoute = gDatalinkModule->get_route(Domain(), peer);
2253 		if (fRoute == NULL)
2254 			return ENETUNREACH;
2255 
2256 		if ((fRoute->flags & RTF_LOCAL) != 0)
2257 			fFlags |= FLAG_LOCAL;
2258 	}
2259 
2260 	// make sure connection does not already exist
2261 	status_t status = fManager->SetConnection(this, *LocalAddress(), peer,
2262 		fRoute->interface_address->local);
2263 	if (status < B_OK)
2264 		return status;
2265 
2266 	fInitialSendSequence = system_time() >> 4;
2267 	fSendNext = fInitialSendSequence;
2268 	fSendUnacknowledged = fInitialSendSequence;
2269 	fSendMax = fInitialSendSequence;
2270 	fSendUrgentOffset = fInitialSendSequence;
2271 	fRecover = fInitialSendSequence.Number();
2272 
2273 	// we are counting the SYN here
2274 	fSendQueue.SetInitialSequence(fSendNext + 1);
2275 
2276 	fReceiveMaxSegmentSize = _MaxSegmentSize(peer);
2277 
2278 	// Compute the window shift we advertise to our peer - if it doesn't support
2279 	// this option, this will be reset to 0 (when its SYN is received)
2280 	fReceiveWindowShift = 0;
2281 	while (fReceiveWindowShift < TCP_MAX_WINDOW_SHIFT
2282 		&& (0xffffUL << fReceiveWindowShift) < socket->receive.buffer_size) {
2283 		fReceiveWindowShift++;
2284 	}
2285 
2286 	return B_OK;
2287 }
2288 
2289 
2290 void
2291 TCPEndpoint::_Acknowledged(tcp_segment_header& segment)
2292 {
2293 	TRACE("_Acknowledged(): ack %" B_PRIu32 "; uack %" B_PRIu32 "; next %"
2294 		B_PRIu32 "; max %" B_PRIu32, segment.acknowledge,
2295 		fSendUnacknowledged.Number(), fSendNext.Number(), fSendMax.Number());
2296 
2297 	ASSERT(fSendUnacknowledged <= segment.acknowledge);
2298 
2299 	if (fSendUnacknowledged < segment.acknowledge) {
2300 		fSendQueue.RemoveUntil(segment.acknowledge);
2301 
2302 		uint32 bytesAcknowledged = segment.acknowledge - fSendUnacknowledged.Number();
2303 		fPreviousHighestAcknowledge = fSendUnacknowledged;
2304 		fSendUnacknowledged = segment.acknowledge;
2305 		uint32 flightSize = (fSendMax - fSendUnacknowledged).Number();
2306 		int32 expectedSamples = flightSize / (fSendMaxSegmentSize << 1);
2307 
2308 		if (fPreviousHighestAcknowledge > fSendUnacknowledged) {
2309 			// need to update the recover variable upon a sequence wraparound
2310 			fRecover = segment.acknowledge - 1;
2311 		}
2312 
2313 		// the acknowledgment of the SYN/ACK MUST NOT increase the size of the congestion window
2314 		if (fSendUnacknowledged != fInitialSendSequence) {
2315 			if (fCongestionWindow < fSlowStartThreshold)
2316 				fCongestionWindow += min_c(bytesAcknowledged, fSendMaxSegmentSize);
2317 			else {
2318 				uint32 increment = fSendMaxSegmentSize * fSendMaxSegmentSize;
2319 
2320 				if (increment < fCongestionWindow)
2321 					increment = 1;
2322 				else
2323 					increment /= fCongestionWindow;
2324 
2325 				fCongestionWindow += increment;
2326 			}
2327 
2328 			fSendMaxSegments = UINT32_MAX;
2329 		}
2330 
2331 		if ((fFlags & FLAG_RECOVERY) != 0) {
2332 			fSendNext = fSendUnacknowledged;
2333 			_SendQueued();
2334 			fCongestionWindow -= bytesAcknowledged;
2335 
2336 			if (bytesAcknowledged > fSendMaxSegmentSize)
2337 				fCongestionWindow += fSendMaxSegmentSize;
2338 
2339 			fSendNext = fSendMax;
2340 		} else
2341 			fDuplicateAcknowledgeCount = 0;
2342 
2343 		if (fSendNext < fSendUnacknowledged)
2344 			fSendNext = fSendUnacknowledged;
2345 
2346 		if (fFlags & FLAG_OPTION_TIMESTAMP) {
2347 			_UpdateRoundTripTime(tcp_diff_timestamp(segment.timestamp_reply),
2348 				expectedSamples > 0 ? expectedSamples : 1);
2349 		} else if (fSendTime != 0 && fRoundTripStartSequence < segment.acknowledge) {
2350 			_UpdateRoundTripTime(tcp_diff_timestamp(fSendTime), 1);
2351 			fSendTime = 0;
2352 		}
2353 
2354 		if (fSendUnacknowledged == fSendMax) {
2355 			TRACE("all acknowledged, cancelling retransmission timer.");
2356 			gStackModule->cancel_timer(&fRetransmitTimer);
2357 			T(TimerSet(this, "retransmit", -1));
2358 		} else {
2359 			TRACE("data acknowledged, resetting retransmission timer to: %"
2360 				B_PRIdBIGTIME, fRetransmitTimeout);
2361 			gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout);
2362 			T(TimerSet(this, "retransmit", fRetransmitTimeout));
2363 		}
2364 
2365 		if (is_writable(fState)) {
2366 			// notify threads waiting on the socket to become writable again
2367 			fSendCondition.NotifyAll();
2368 			gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Free());
2369 		}
2370 	}
2371 
2372 	// if there is data left to be sent, send it now
2373 	if (fSendQueue.Used() > 0)
2374 		_SendQueued();
2375 }
2376 
2377 
2378 void
2379 TCPEndpoint::_Retransmit()
2380 {
2381 	TRACE("Retransmit()");
2382 
2383 	if (fState < ESTABLISHED) {
2384 		fRetransmitTimeout = TCP_SYN_RETRANSMIT_TIMEOUT;
2385 		fCongestionWindow = fSendMaxSegmentSize;
2386 	} else {
2387 		_ResetSlowStart();
2388 		fDuplicateAcknowledgeCount = 0;
2389 		// Do exponential back off of the retransmit timeout
2390 		fRetransmitTimeout *= 2;
2391 		if (fRetransmitTimeout > TCP_MAX_RETRANSMIT_TIMEOUT)
2392 			fRetransmitTimeout = TCP_MAX_RETRANSMIT_TIMEOUT;
2393 	}
2394 
2395 	fSendNext = fSendUnacknowledged;
2396 	_SendQueued();
2397 
2398 	fRecover = fSendNext.Number() - 1;
2399 	if ((fFlags & FLAG_RECOVERY) != 0)
2400 		fFlags &= ~FLAG_RECOVERY;
2401 }
2402 
2403 
2404 void
2405 TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime, int32 expectedSamples)
2406 {
2407 	if (fSmoothedRoundTripTime == 0) {
2408 		fSmoothedRoundTripTime = roundTripTime;
2409 		fRoundTripVariation = roundTripTime / 2;
2410 		fRetransmitTimeout = (fSmoothedRoundTripTime + max_c(100, fRoundTripVariation * 4))
2411 				* kTimestampFactor;
2412 	} else {
2413 		int32 delta = fSmoothedRoundTripTime - roundTripTime;
2414 		if (delta < 0)
2415 			delta = -delta;
2416 		fRoundTripVariation += (delta - fRoundTripVariation) / (expectedSamples * 4);
2417 		fSmoothedRoundTripTime += (roundTripTime - fSmoothedRoundTripTime) / (expectedSamples * 8);
2418 		fRetransmitTimeout = (fSmoothedRoundTripTime + max_c(100, fRoundTripVariation * 4))
2419 			* kTimestampFactor;
2420 	}
2421 
2422 	if (fRetransmitTimeout > TCP_MAX_RETRANSMIT_TIMEOUT)
2423 		fRetransmitTimeout = TCP_MAX_RETRANSMIT_TIMEOUT;
2424 
2425 	if (fRetransmitTimeout < TCP_MIN_RETRANSMIT_TIMEOUT)
2426 		fRetransmitTimeout = TCP_MIN_RETRANSMIT_TIMEOUT;
2427 
2428 	TRACE("  RTO is now %" B_PRIdBIGTIME " (after rtt %" B_PRId32 "ms)",
2429 		fRetransmitTimeout, roundTripTime);
2430 }
2431 
2432 
2433 void
2434 TCPEndpoint::_ResetSlowStart()
2435 {
2436 	fSlowStartThreshold = max_c((fSendMax - fSendUnacknowledged).Number() / 2,
2437 		2 * fSendMaxSegmentSize);
2438 	fCongestionWindow = fSendMaxSegmentSize;
2439 }
2440 
2441 
2442 //	#pragma mark - timer
2443 
2444 
2445 /*static*/ void
2446 TCPEndpoint::_RetransmitTimer(net_timer* timer, void* _endpoint)
2447 {
2448 	TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint;
2449 	T(TimerTriggered(endpoint, "retransmit"));
2450 
2451 	MutexLocker locker(endpoint->fLock);
2452 	if (!locker.IsLocked() || gStackModule->is_timer_active(timer))
2453 		return;
2454 
2455 	endpoint->_Retransmit();
2456 }
2457 
2458 
2459 /*static*/ void
2460 TCPEndpoint::_PersistTimer(net_timer* timer, void* _endpoint)
2461 {
2462 	TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint;
2463 	T(TimerTriggered(endpoint, "persist"));
2464 
2465 	MutexLocker locker(endpoint->fLock);
2466 	if (!locker.IsLocked())
2467 		return;
2468 
2469 	// the timer might not have been canceled early enough
2470 	if (endpoint->State() == CLOSED)
2471 		return;
2472 
2473 	endpoint->_SendQueued(true);
2474 }
2475 
2476 
2477 /*static*/ void
2478 TCPEndpoint::_DelayedAcknowledgeTimer(net_timer* timer, void* _endpoint)
2479 {
2480 	TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint;
2481 	T(TimerTriggered(endpoint, "delayed ack"));
2482 
2483 	MutexLocker locker(endpoint->fLock);
2484 	if (!locker.IsLocked())
2485 		return;
2486 
2487 	// the timer might not have been canceled early enough
2488 	if (endpoint->State() == CLOSED)
2489 		return;
2490 
2491 	endpoint->SendAcknowledge(true);
2492 }
2493 
2494 
2495 /*static*/ void
2496 TCPEndpoint::_TimeWaitTimer(net_timer* timer, void* _endpoint)
2497 {
2498 	TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint;
2499 	T(TimerTriggered(endpoint, "time-wait"));
2500 
2501 	MutexLocker locker(endpoint->fLock);
2502 	if (!locker.IsLocked())
2503 		return;
2504 
2505 	if ((endpoint->fFlags & FLAG_CLOSED) == 0) {
2506 		endpoint->fFlags |= FLAG_DELETE_ON_CLOSE;
2507 		return;
2508 	}
2509 
2510 	locker.Unlock();
2511 
2512 	gSocketModule->release_socket(endpoint->socket);
2513 }
2514 
2515 
2516 /*static*/ status_t
2517 TCPEndpoint::_WaitForCondition(ConditionVariable& condition,
2518 	MutexLocker& locker, bigtime_t timeout)
2519 {
2520 	ConditionVariableEntry entry;
2521 	condition.Add(&entry);
2522 
2523 	locker.Unlock();
2524 	status_t result = entry.Wait(B_ABSOLUTE_TIMEOUT | B_CAN_INTERRUPT, timeout);
2525 	locker.Lock();
2526 
2527 	return result;
2528 }
2529 
2530 
2531 //	#pragma mark -
2532 
2533 
2534 void
2535 TCPEndpoint::Dump() const
2536 {
2537 	kprintf("TCP endpoint %p\n", this);
2538 	kprintf("  state: %s\n", name_for_state(fState));
2539 	kprintf("  flags: 0x%" B_PRIx32 "\n", fFlags);
2540 #if KDEBUG
2541 	kprintf("  lock: { %p, holder: %" B_PRId32 " }\n", &fLock, fLock.holder);
2542 #endif
2543 	kprintf("  accept sem: %" B_PRId32 "\n", fAcceptSemaphore);
2544 	kprintf("  options: 0x%" B_PRIx32 "\n", (uint32)fOptions);
2545 	kprintf("  send\n");
2546 	kprintf("    window shift: %" B_PRIu8 "\n", fSendWindowShift);
2547 	kprintf("    unacknowledged: %" B_PRIu32 "\n",
2548 		fSendUnacknowledged.Number());
2549 	kprintf("    next: %" B_PRIu32 "\n", fSendNext.Number());
2550 	kprintf("    max: %" B_PRIu32 "\n", fSendMax.Number());
2551 	kprintf("    urgent offset: %" B_PRIu32 "\n", fSendUrgentOffset.Number());
2552 	kprintf("    window: %" B_PRIu32 "\n", fSendWindow);
2553 	kprintf("    max window: %" B_PRIu32 "\n", fSendMaxWindow);
2554 	kprintf("    max segment size: %" B_PRIu32 "\n", fSendMaxSegmentSize);
2555 	kprintf("    queue: %" B_PRIuSIZE " / %" B_PRIuSIZE "\n", fSendQueue.Used(),
2556 		fSendQueue.Size());
2557 #if DEBUG_TCP_BUFFER_QUEUE
2558 	fSendQueue.Dump();
2559 #endif
2560 	kprintf("    last acknowledge sent: %" B_PRIu32 "\n",
2561 		fLastAcknowledgeSent.Number());
2562 	kprintf("    initial sequence: %" B_PRIu32 "\n",
2563 		fInitialSendSequence.Number());
2564 	kprintf("  receive\n");
2565 	kprintf("    window shift: %" B_PRIu8 "\n", fReceiveWindowShift);
2566 	kprintf("    next: %" B_PRIu32 "\n", fReceiveNext.Number());
2567 	kprintf("    max advertised: %" B_PRIu32 "\n",
2568 		fReceiveMaxAdvertised.Number());
2569 	kprintf("    window: %" B_PRIu32 "\n", fReceiveWindow);
2570 	kprintf("    max segment size: %" B_PRIu32 "\n", fReceiveMaxSegmentSize);
2571 	kprintf("    queue: %" B_PRIuSIZE " / %" B_PRIuSIZE "\n",
2572 		fReceiveQueue.Available(), fReceiveQueue.Size());
2573 #if DEBUG_TCP_BUFFER_QUEUE
2574 	fReceiveQueue.Dump();
2575 #endif
2576 	kprintf("    initial sequence: %" B_PRIu32 "\n",
2577 		fInitialReceiveSequence.Number());
2578 	kprintf("    duplicate acknowledge count: %" B_PRIu32 "\n",
2579 		fDuplicateAcknowledgeCount);
2580 	kprintf("  smoothed round trip time: %" B_PRId32 " (deviation %" B_PRId32 ")\n",
2581 		fSmoothedRoundTripTime, fRoundTripVariation);
2582 	kprintf("  retransmit timeout: %" B_PRId64 "\n", fRetransmitTimeout);
2583 	kprintf("  congestion window: %" B_PRIu32 "\n", fCongestionWindow);
2584 	kprintf("  slow start threshold: %" B_PRIu32 "\n", fSlowStartThreshold);
2585 }
2586 
2587