1 /* 2 * Copyright 2006-2010, Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Andrew Galante, haiku.galante@gmail.com 7 * Axel Dörfler, axeld@pinc-software.de 8 * Hugo Santos, hugosantos@gmail.com 9 */ 10 11 12 #include "TCPEndpoint.h" 13 14 #include <netinet/in.h> 15 #include <netinet/ip.h> 16 #include <netinet/tcp.h> 17 #include <new> 18 #include <signal.h> 19 #include <stdlib.h> 20 #include <string.h> 21 #include <stdint.h> 22 23 #include <KernelExport.h> 24 #include <Select.h> 25 26 #include <net_buffer.h> 27 #include <net_datalink.h> 28 #include <net_stat.h> 29 #include <NetBufferUtilities.h> 30 #include <NetUtilities.h> 31 32 #include <lock.h> 33 #include <low_resource_manager.h> 34 #include <tracing.h> 35 #include <util/AutoLock.h> 36 #include <util/list.h> 37 38 #include "EndpointManager.h" 39 40 41 // References: 42 // - RFC 793 - Transmission Control Protocol 43 // - RFC 813 - Window and Acknowledgement Strategy in TCP 44 // - RFC 1337 - TIME_WAIT Assassination Hazards in TCP 45 // 46 // Things incomplete in this implementation: 47 // - TCP Extensions for High Performance, RFC 1323 - RTTM, PAWS 48 // - Congestion Control, RFC 5681 49 // - Limited Transit, RFC 3042 50 // - SACK, Selective Acknowledgment; RFC 2018, RFC 2883, RFC 6675 51 // - NewReno Modification to TCP's Fast Recovery, RFC 2582 52 // 53 // Things this implementation currently doesn't implement: 54 // - Explicit Congestion Notification (ECN), RFC 3168 55 // - SYN-Cache 56 // - Forward RTO-Recovery, RFC 4138 57 // - Time-Wait hash instead of keeping sockets alive 58 59 #define PrintAddress(address) \ 60 AddressString(Domain(), address, true).Data() 61 62 //#define TRACE_TCP 63 //#define PROBE_TCP 64 65 #ifdef TRACE_TCP 66 // the space before ', ##args' is important in order for this to work with cpp 2.95 67 # define TRACE(format, args...) dprintf("%" B_PRId32 ": TCP [%" \ 68 B_PRIdBIGTIME "] %p (%12s) " format "\n", find_thread(NULL), \ 69 system_time(), this, name_for_state(fState) , ##args) 70 #else 71 # define TRACE(args...) do { } while (0) 72 #endif 73 74 #ifdef PROBE_TCP 75 # define PROBE(buffer, window) \ 76 dprintf("TCP PROBE %" B_PRIdBIGTIME " %s %s %" B_PRIu32 " snxt %" B_PRIu32 \ 77 " suna %" B_PRIu32 " cw %" B_PRIu32 " sst %" B_PRIu32 " win %" \ 78 B_PRIu32 " swin %" B_PRIu32 " smax-suna %" B_PRIu32 " savail %" \ 79 B_PRIuSIZE " sqused %" B_PRIuSIZE " rto %" B_PRIdBIGTIME "\n", \ 80 system_time(), PrintAddress(buffer->source), \ 81 PrintAddress(buffer->destination), buffer->size, fSendNext.Number(), \ 82 fSendUnacknowledged.Number(), fCongestionWindow, fSlowStartThreshold, \ 83 window, fSendWindow, (fSendMax - fSendUnacknowledged).Number(), \ 84 fSendQueue.Available(fSendNext), fSendQueue.Used(), fRetransmitTimeout) 85 #else 86 # define PROBE(buffer, window) do { } while (0) 87 #endif 88 89 #if TCP_TRACING 90 namespace TCPTracing { 91 92 class Receive : public AbstractTraceEntry { 93 public: 94 Receive(TCPEndpoint* endpoint, tcp_segment_header& segment, uint32 window, 95 net_buffer* buffer) 96 : 97 fEndpoint(endpoint), 98 fBuffer(buffer), 99 fBufferSize(buffer->size), 100 fSequence(segment.sequence), 101 fAcknowledge(segment.acknowledge), 102 fWindow(window), 103 fState(endpoint->State()), 104 fFlags(segment.flags) 105 { 106 Initialized(); 107 } 108 109 virtual void AddDump(TraceOutput& out) 110 { 111 out.Print("tcp:%p (%12s) receive buffer %p (%" B_PRIu32 " bytes), " 112 "flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32 113 ", wnd %" B_PRIu32, fEndpoint, name_for_state(fState), fBuffer, 114 fBufferSize, fFlags, fSequence, fAcknowledge, fWindow); 115 } 116 117 protected: 118 TCPEndpoint* fEndpoint; 119 net_buffer* fBuffer; 120 uint32 fBufferSize; 121 uint32 fSequence; 122 uint32 fAcknowledge; 123 uint32 fWindow; 124 tcp_state fState; 125 uint8 fFlags; 126 }; 127 128 class Send : public AbstractTraceEntry { 129 public: 130 Send(TCPEndpoint* endpoint, tcp_segment_header& segment, net_buffer* buffer, 131 tcp_sequence firstSequence, tcp_sequence lastSequence) 132 : 133 fEndpoint(endpoint), 134 fBuffer(buffer), 135 fBufferSize(buffer->size), 136 fSequence(segment.sequence), 137 fAcknowledge(segment.acknowledge), 138 fFirstSequence(firstSequence.Number()), 139 fLastSequence(lastSequence.Number()), 140 fState(endpoint->State()), 141 fFlags(segment.flags) 142 { 143 Initialized(); 144 } 145 146 virtual void AddDump(TraceOutput& out) 147 { 148 out.Print("tcp:%p (%12s) send buffer %p (%" B_PRIu32 " bytes), " 149 "flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32 150 ", first %" B_PRIu32 ", last %" B_PRIu32, fEndpoint, 151 name_for_state(fState), fBuffer, fBufferSize, fFlags, fSequence, 152 fAcknowledge, fFirstSequence, fLastSequence); 153 } 154 155 protected: 156 TCPEndpoint* fEndpoint; 157 net_buffer* fBuffer; 158 uint32 fBufferSize; 159 uint32 fSequence; 160 uint32 fAcknowledge; 161 uint32 fFirstSequence; 162 uint32 fLastSequence; 163 tcp_state fState; 164 uint8 fFlags; 165 }; 166 167 class State : public AbstractTraceEntry { 168 public: 169 State(TCPEndpoint* endpoint) 170 : 171 fEndpoint(endpoint), 172 fState(endpoint->State()) 173 { 174 Initialized(); 175 } 176 177 virtual void AddDump(TraceOutput& out) 178 { 179 out.Print("tcp:%p (%12s) state change", fEndpoint, 180 name_for_state(fState)); 181 } 182 183 protected: 184 TCPEndpoint* fEndpoint; 185 tcp_state fState; 186 }; 187 188 class Spawn : public AbstractTraceEntry { 189 public: 190 Spawn(TCPEndpoint* listeningEndpoint, TCPEndpoint* spawnedEndpoint) 191 : 192 fListeningEndpoint(listeningEndpoint), 193 fSpawnedEndpoint(spawnedEndpoint) 194 { 195 Initialized(); 196 } 197 198 virtual void AddDump(TraceOutput& out) 199 { 200 out.Print("tcp:%p spawns %p", fListeningEndpoint, fSpawnedEndpoint); 201 } 202 203 protected: 204 TCPEndpoint* fListeningEndpoint; 205 TCPEndpoint* fSpawnedEndpoint; 206 }; 207 208 class Error : public AbstractTraceEntry { 209 public: 210 Error(TCPEndpoint* endpoint, const char* error, int32 line) 211 : 212 fEndpoint(endpoint), 213 fLine(line), 214 fError(error), 215 fState(endpoint->State()) 216 { 217 Initialized(); 218 } 219 220 virtual void AddDump(TraceOutput& out) 221 { 222 out.Print("tcp:%p (%12s) error at line %" B_PRId32 ": %s", fEndpoint, 223 name_for_state(fState), fLine, fError); 224 } 225 226 protected: 227 TCPEndpoint* fEndpoint; 228 int32 fLine; 229 const char* fError; 230 tcp_state fState; 231 }; 232 233 class TimerSet : public AbstractTraceEntry { 234 public: 235 TimerSet(TCPEndpoint* endpoint, const char* which, bigtime_t timeout) 236 : 237 fEndpoint(endpoint), 238 fWhich(which), 239 fTimeout(timeout), 240 fState(endpoint->State()) 241 { 242 Initialized(); 243 } 244 245 virtual void AddDump(TraceOutput& out) 246 { 247 out.Print("tcp:%p (%12s) %s timer set to %" B_PRIdBIGTIME, fEndpoint, 248 name_for_state(fState), fWhich, fTimeout); 249 } 250 251 protected: 252 TCPEndpoint* fEndpoint; 253 const char* fWhich; 254 bigtime_t fTimeout; 255 tcp_state fState; 256 }; 257 258 class TimerTriggered : public AbstractTraceEntry { 259 public: 260 TimerTriggered(TCPEndpoint* endpoint, const char* which) 261 : 262 fEndpoint(endpoint), 263 fWhich(which), 264 fState(endpoint->State()) 265 { 266 Initialized(); 267 } 268 269 virtual void AddDump(TraceOutput& out) 270 { 271 out.Print("tcp:%p (%12s) %s timer triggered", fEndpoint, 272 name_for_state(fState), fWhich); 273 } 274 275 protected: 276 TCPEndpoint* fEndpoint; 277 const char* fWhich; 278 tcp_state fState; 279 }; 280 281 class APICall : public AbstractTraceEntry { 282 public: 283 APICall(TCPEndpoint* endpoint, const char* which) 284 : 285 fEndpoint(endpoint), 286 fWhich(which), 287 fState(endpoint->State()) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("tcp:%p (%12s) api call: %s", fEndpoint, 295 name_for_state(fState), fWhich); 296 } 297 298 protected: 299 TCPEndpoint* fEndpoint; 300 const char* fWhich; 301 tcp_state fState; 302 }; 303 304 } // namespace TCPTracing 305 306 # define T(x) new(std::nothrow) TCPTracing::x 307 #else 308 # define T(x) 309 #endif // TCP_TRACING 310 311 312 // constants for the fFlags field 313 enum { 314 FLAG_OPTION_WINDOW_SCALE = 0x01, 315 FLAG_OPTION_TIMESTAMP = 0x02, 316 // TODO: Should FLAG_NO_RECEIVE apply as well to received connections? 317 // That is, what is expected from accept() after a shutdown() 318 // is performed on a listen()ing socket. 319 FLAG_NO_RECEIVE = 0x04, 320 FLAG_CLOSED = 0x08, 321 FLAG_DELETE_ON_CLOSE = 0x10, 322 FLAG_LOCAL = 0x20, 323 FLAG_RECOVERY = 0x40, 324 FLAG_OPTION_SACK_PERMITTED = 0x80, 325 FLAG_AUTO_RECEIVE_BUFFER_SIZE = 0x100, 326 }; 327 328 329 static const int kTimestampFactor = 1000; 330 // conversion factor between usec system time and msec tcp time 331 332 333 static inline bigtime_t 334 absolute_timeout(bigtime_t timeout) 335 { 336 if (timeout == 0 || timeout == B_INFINITE_TIMEOUT) 337 return timeout; 338 339 return timeout + system_time(); 340 } 341 342 343 static inline status_t 344 posix_error(status_t error) 345 { 346 if (error == B_TIMED_OUT) 347 return B_WOULD_BLOCK; 348 349 return error; 350 } 351 352 353 static inline bool 354 in_window(const tcp_sequence& sequence, const tcp_sequence& receiveNext, 355 uint32 receiveWindow) 356 { 357 return sequence >= receiveNext && sequence < (receiveNext + receiveWindow); 358 } 359 360 361 static inline bool 362 segment_in_sequence(const tcp_segment_header& segment, int size, 363 const tcp_sequence& receiveNext, uint32 receiveWindow) 364 { 365 tcp_sequence sequence(segment.sequence); 366 if (size == 0) { 367 if (receiveWindow == 0) 368 return sequence == receiveNext; 369 return in_window(sequence, receiveNext, receiveWindow); 370 } else { 371 if (receiveWindow == 0) 372 return false; 373 return in_window(sequence, receiveNext, receiveWindow) 374 || in_window(sequence + size - 1, receiveNext, receiveWindow); 375 } 376 } 377 378 379 static inline bool 380 is_writable(tcp_state state) 381 { 382 return state == ESTABLISHED || state == FINISH_RECEIVED; 383 } 384 385 386 static inline bool 387 is_establishing(tcp_state state) 388 { 389 return state == SYNCHRONIZE_SENT || state == SYNCHRONIZE_RECEIVED; 390 } 391 392 393 static inline uint32 394 tcp_now() 395 { 396 return system_time() / kTimestampFactor; 397 } 398 399 400 static inline uint32 401 tcp_diff_timestamp(uint32 base) 402 { 403 const uint32 now = tcp_now(); 404 if (now >= base) 405 return now - base; 406 407 return now + UINT_MAX - base; 408 } 409 410 411 static inline bool 412 state_needs_finish(int32 state) 413 { 414 return state == WAIT_FOR_FINISH_ACKNOWLEDGE 415 || state == FINISH_SENT || state == CLOSING; 416 } 417 418 419 // #pragma mark - 420 421 422 TCPEndpoint::TCPEndpoint(net_socket* socket) 423 : 424 ProtocolSocket(socket), 425 fManager(NULL), 426 fOptions(0), 427 fSendWindowShift(0), 428 fReceiveWindowShift(0), 429 fSendUnacknowledged(0), 430 fSendNext(0), 431 fSendMax(0), 432 fSendUrgentOffset(0), 433 fSendWindow(0), 434 fSendMaxWindow(0), 435 fSendMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), 436 fSendMaxSegments(0), 437 fSendQueue(socket->send.buffer_size), 438 fInitialSendSequence(0), 439 fPreviousHighestAcknowledge(0), 440 fDuplicateAcknowledgeCount(0), 441 fPreviousFlightSize(0), 442 fRecover(0), 443 fRoute(NULL), 444 fReceiveNext(0), 445 fReceiveMaxAdvertised(0), 446 fReceiveWindow(socket->receive.buffer_size), 447 fReceiveMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), 448 fReceiveQueue(socket->receive.buffer_size), 449 fSmoothedRoundTripTime(-1), 450 fRoundTripVariation(0), 451 fSendTime(0), 452 fRoundTripStartSequence(0), 453 fRetransmitTimeout(TCP_INITIAL_RTT), 454 fReceivedTimestamp(0), 455 fCongestionWindow(0), 456 fSlowStartThreshold(0), 457 fState(CLOSED), 458 fFlags(FLAG_OPTION_WINDOW_SCALE | FLAG_OPTION_TIMESTAMP 459 | FLAG_OPTION_SACK_PERMITTED | FLAG_AUTO_RECEIVE_BUFFER_SIZE) 460 { 461 // TODO: to be replaced with a real read/write locking strategy! 462 mutex_init(&fLock, "tcp lock"); 463 464 fReceiveCondition.Init(this, "tcp receive"); 465 fSendCondition.Init(this, "tcp send"); 466 467 gStackModule->init_timer(&fPersistTimer, TCPEndpoint::_PersistTimer, this); 468 gStackModule->init_timer(&fRetransmitTimer, TCPEndpoint::_RetransmitTimer, 469 this); 470 gStackModule->init_timer(&fDelayedAcknowledgeTimer, 471 TCPEndpoint::_DelayedAcknowledgeTimer, this); 472 gStackModule->init_timer(&fTimeWaitTimer, TCPEndpoint::_TimeWaitTimer, 473 this); 474 475 T(APICall(this, "constructor")); 476 } 477 478 479 TCPEndpoint::~TCPEndpoint() 480 { 481 mutex_lock(&fLock); 482 483 T(APICall(this, "destructor")); 484 485 _CancelConnectionTimers(); 486 gStackModule->cancel_timer(&fTimeWaitTimer); 487 T(TimerSet(this, "time-wait", -1)); 488 489 if (fManager != NULL) { 490 fManager->Unbind(this); 491 put_endpoint_manager(fManager); 492 } 493 494 mutex_destroy(&fLock); 495 496 // we need to wait for all timers to return 497 gStackModule->wait_for_timer(&fRetransmitTimer); 498 gStackModule->wait_for_timer(&fPersistTimer); 499 gStackModule->wait_for_timer(&fDelayedAcknowledgeTimer); 500 gStackModule->wait_for_timer(&fTimeWaitTimer); 501 502 gDatalinkModule->put_route(Domain(), fRoute); 503 } 504 505 506 status_t 507 TCPEndpoint::InitCheck() const 508 { 509 return B_OK; 510 } 511 512 513 // #pragma mark - protocol API 514 515 516 status_t 517 TCPEndpoint::Open() 518 { 519 TRACE("Open()"); 520 T(APICall(this, "open")); 521 522 status_t status = ProtocolSocket::Open(); 523 if (status < B_OK) 524 return status; 525 526 fManager = get_endpoint_manager(Domain()); 527 if (fManager == NULL) 528 return EAFNOSUPPORT; 529 530 return B_OK; 531 } 532 533 534 status_t 535 TCPEndpoint::Close() 536 { 537 MutexLocker locker(fLock); 538 539 TRACE("Close()"); 540 T(APICall(this, "close")); 541 542 if (fState == LISTEN) 543 delete_sem(fAcceptSemaphore); 544 545 if (fState == SYNCHRONIZE_SENT || fState == LISTEN) { 546 // TODO: what about linger in case of SYNCHRONIZE_SENT? 547 fState = CLOSED; 548 T(State(this)); 549 return B_OK; 550 } 551 552 // handle linger with zero timeout 553 if ((socket->options & SO_LINGER) != 0 && socket->linger == 0) { 554 fState = CLOSED; 555 T(State(this)); 556 return _SendQueued(true); 557 } 558 559 status_t status = _Disconnect(true); 560 if (status != B_OK) 561 return status; 562 563 if ((socket->options & SO_LINGER) != 0) { 564 TRACE("Close(): Lingering for %i secs", socket->linger); 565 566 bigtime_t maximum = absolute_timeout(socket->linger * 1000000LL); 567 568 while (fSendQueue.Used() > 0) { 569 status = _WaitForCondition(fSendCondition, locker, maximum); 570 if (status == B_TIMED_OUT || status == B_WOULD_BLOCK) 571 break; 572 else if (status < B_OK) 573 return status; 574 } 575 576 TRACE("Close(): after waiting, the SendQ was left with %" B_PRIuSIZE 577 " bytes.", fSendQueue.Used()); 578 } 579 return B_OK; 580 } 581 582 583 void 584 TCPEndpoint::Free() 585 { 586 MutexLocker _(fLock); 587 588 TRACE("Free()"); 589 T(APICall(this, "free")); 590 591 if (fState <= SYNCHRONIZE_SENT) 592 return; 593 594 fFlags |= FLAG_CLOSED; 595 if ((fFlags & FLAG_DELETE_ON_CLOSE) == 0) { 596 // we'll be freed later when the 2MSL timer expires 597 gSocketModule->acquire_socket(socket); 598 599 // we are only interested in the timer, not in changing state 600 _EnterTimeWait(); 601 } 602 } 603 604 605 /*! Creates and sends a synchronize packet to /a address, and then waits 606 until the connection has been established or refused. 607 */ 608 status_t 609 TCPEndpoint::Connect(const sockaddr* address) 610 { 611 if (!AddressModule()->is_same_family(address)) 612 return EAFNOSUPPORT; 613 614 MutexLocker locker(fLock); 615 616 TRACE("Connect() on address %s", PrintAddress(address)); 617 T(APICall(this, "connect")); 618 619 if (gStackModule->is_restarted_syscall()) { 620 bigtime_t timeout = gStackModule->restore_syscall_restart_timeout(); 621 status_t status = _WaitForEstablished(locker, timeout); 622 TRACE(" Connect(): Connection complete: %s (timeout was %" 623 B_PRIdBIGTIME ")", strerror(status), timeout); 624 return posix_error(status); 625 } 626 627 // Can only call connect() from CLOSED or LISTEN states 628 // otherwise endpoint is considered already connected 629 if (fState == LISTEN) { 630 // this socket is about to connect; remove pending connections in the backlog 631 gSocketModule->set_max_backlog(socket, 0); 632 } else if (fState == ESTABLISHED) { 633 return EISCONN; 634 } else if (fState != CLOSED) 635 return EALREADY; 636 637 // consider destination address INADDR_ANY as INADDR_LOOPBACK 638 sockaddr_storage _address; 639 if (AddressModule()->is_empty_address(address, false)) { 640 AddressModule()->get_loopback_address((sockaddr *)&_address); 641 // for IPv4 and IPv6 the port is at the same offset 642 ((sockaddr_in &)_address).sin_port = ((sockaddr_in *)address)->sin_port; 643 address = (sockaddr *)&_address; 644 } 645 646 status_t status = _PrepareSendPath(address); 647 if (status < B_OK) 648 return status; 649 650 TRACE(" Connect(): starting 3-way handshake..."); 651 652 fState = SYNCHRONIZE_SENT; 653 T(State(this)); 654 655 // send SYN 656 status = _SendAcknowledge(); 657 if (status != B_OK) { 658 _Close(); 659 return status; 660 } 661 662 // If we are running over Loopback, after _SendQueued() returns we 663 // may be in ESTABLISHED already. 664 if (fState == ESTABLISHED) { 665 TRACE(" Connect() completed after _SendQueued()"); 666 return B_OK; 667 } 668 669 // wait until 3-way handshake is complete (if needed) 670 bigtime_t timeout = min_c(socket->send.timeout, TCP_CONNECTION_TIMEOUT); 671 if (timeout == 0) { 672 // we're a non-blocking socket 673 TRACE(" Connect() delayed, return EINPROGRESS"); 674 return EINPROGRESS; 675 } 676 677 bigtime_t absoluteTimeout = absolute_timeout(timeout); 678 gStackModule->store_syscall_restart_timeout(absoluteTimeout); 679 680 status = _WaitForEstablished(locker, absoluteTimeout); 681 TRACE(" Connect(): Connection complete: %s (timeout was %" B_PRIdBIGTIME 682 ")", strerror(status), timeout); 683 return posix_error(status); 684 } 685 686 687 status_t 688 TCPEndpoint::Accept(struct net_socket** _acceptedSocket) 689 { 690 MutexLocker locker(fLock); 691 692 TRACE("Accept()"); 693 T(APICall(this, "accept")); 694 695 status_t status; 696 bigtime_t timeout = absolute_timeout(socket->receive.timeout); 697 if (gStackModule->is_restarted_syscall()) 698 timeout = gStackModule->restore_syscall_restart_timeout(); 699 else 700 gStackModule->store_syscall_restart_timeout(timeout); 701 702 do { 703 locker.Unlock(); 704 705 status = acquire_sem_etc(fAcceptSemaphore, 1, B_ABSOLUTE_TIMEOUT 706 | B_CAN_INTERRUPT, timeout); 707 if (status != B_OK) { 708 if (status == B_TIMED_OUT && socket->receive.timeout == 0) 709 return B_WOULD_BLOCK; 710 711 return status; 712 } 713 714 locker.Lock(); 715 status = gSocketModule->dequeue_connected(socket, _acceptedSocket); 716 #ifdef TRACE_TCP 717 if (status == B_OK) 718 TRACE(" Accept() returning %p", (*_acceptedSocket)->first_protocol); 719 #endif 720 } while (status != B_OK); 721 722 return status; 723 } 724 725 726 status_t 727 TCPEndpoint::Bind(const sockaddr *address) 728 { 729 if (address == NULL) 730 return B_BAD_VALUE; 731 732 MutexLocker lock(fLock); 733 734 TRACE("Bind() on address %s", PrintAddress(address)); 735 T(APICall(this, "bind")); 736 737 if (fState != CLOSED) 738 return EISCONN; 739 740 return fManager->Bind(this, address); 741 } 742 743 744 status_t 745 TCPEndpoint::Unbind(struct sockaddr *address) 746 { 747 MutexLocker _(fLock); 748 749 TRACE("Unbind()"); 750 T(APICall(this, "unbind")); 751 752 return fManager->Unbind(this); 753 } 754 755 756 status_t 757 TCPEndpoint::Listen(int count) 758 { 759 MutexLocker _(fLock); 760 761 TRACE("Listen()"); 762 T(APICall(this, "listen")); 763 764 if (fState != CLOSED && fState != LISTEN) 765 return B_BAD_VALUE; 766 767 if (fState == CLOSED) { 768 fAcceptSemaphore = create_sem(0, "tcp accept"); 769 if (fAcceptSemaphore < B_OK) 770 return ENOBUFS; 771 772 status_t status = fManager->SetPassive(this); 773 if (status != B_OK) { 774 delete_sem(fAcceptSemaphore); 775 fAcceptSemaphore = -1; 776 return status; 777 } 778 } 779 780 gSocketModule->set_max_backlog(socket, count); 781 782 fState = LISTEN; 783 T(State(this)); 784 return B_OK; 785 } 786 787 788 status_t 789 TCPEndpoint::Shutdown(int direction) 790 { 791 MutexLocker lock(fLock); 792 793 TRACE("Shutdown(%i)", direction); 794 T(APICall(this, "shutdown")); 795 796 if (direction == SHUT_RD || direction == SHUT_RDWR) 797 fFlags |= FLAG_NO_RECEIVE; 798 799 if (direction == SHUT_WR || direction == SHUT_RDWR) { 800 // TODO: That's not correct. After read/write shutting down the socket 801 // one should still be able to read previously arrived data. 802 _Disconnect(false); 803 } 804 805 return B_OK; 806 } 807 808 809 /*! Puts data contained in \a buffer into send buffer */ 810 status_t 811 TCPEndpoint::SendData(net_buffer *buffer) 812 { 813 MutexLocker lock(fLock); 814 815 TRACE("SendData(buffer %p, size %" B_PRIu32 ", flags %#" B_PRIx32 816 ") [total %" B_PRIuSIZE " bytes, has %" B_PRIuSIZE "]", buffer, 817 buffer->size, buffer->flags, fSendQueue.Size(), fSendQueue.Free()); 818 T(APICall(this, "senddata")); 819 820 const uint32 flags = buffer->msg_flags; 821 if ((flags & ~(MSG_DONTWAIT | MSG_OOB | MSG_EOF)) != 0) 822 return EOPNOTSUPP; 823 824 if (fState == CLOSED) 825 return ENOTCONN; 826 if (fState == LISTEN) 827 return EDESTADDRREQ; 828 if (!is_writable(fState) && !is_establishing(fState)) 829 return EPIPE; 830 831 size_t left = buffer->size; 832 833 bigtime_t timeout = 0; 834 if ((flags & MSG_DONTWAIT) == 0) { 835 timeout = absolute_timeout(socket->send.timeout); 836 if (gStackModule->is_restarted_syscall()) 837 timeout = gStackModule->restore_syscall_restart_timeout(); 838 else 839 gStackModule->store_syscall_restart_timeout(timeout); 840 } 841 842 while (left > 0) { 843 while (fSendQueue.Free() < socket->send.low_water_mark) { 844 // initiate a send before waiting 845 _SendQueued(); 846 847 // wait until enough space is available 848 status_t status = _WaitForCondition(fSendCondition, lock, timeout); 849 if (status < B_OK) { 850 TRACE(" SendData() returning %s (%d)", 851 strerror(posix_error(status)), (int)posix_error(status)); 852 return posix_error(status); 853 } 854 855 if (!is_writable(fState) && !is_establishing(fState)) 856 return EPIPE; 857 } 858 859 size_t size = fSendQueue.Free(); 860 if (size < left) { 861 // we need to split the original buffer 862 net_buffer* clone = gBufferModule->clone(buffer, false); 863 // TODO: add offset/size parameter to net_buffer::clone() or 864 // even a move_data() function, as this is a bit inefficient 865 if (clone == NULL) 866 return ENOBUFS; 867 868 status_t status = gBufferModule->trim(clone, size); 869 if (status != B_OK) { 870 gBufferModule->free(clone); 871 return status; 872 } 873 874 gBufferModule->remove_header(buffer, size); 875 left -= size; 876 fSendQueue.Add(clone); 877 } else { 878 left -= buffer->size; 879 fSendQueue.Add(buffer); 880 } 881 } 882 883 TRACE(" SendData(): %" B_PRIuSIZE " bytes used.", fSendQueue.Used()); 884 885 bool force = false; 886 if ((flags & MSG_OOB) != 0) { 887 fSendUrgentOffset = fSendQueue.LastSequence(); 888 // RFC 961 specifies that the urgent offset points to the last 889 // byte of urgent data. However, this is commonly implemented as 890 // here, ie. it points to the first byte after the urgent data. 891 force = true; 892 } 893 if ((flags & MSG_EOF) != 0) 894 _Disconnect(false); 895 896 _SendQueued(force); 897 898 return B_OK; 899 } 900 901 902 ssize_t 903 TCPEndpoint::SendAvailable() 904 { 905 MutexLocker locker(fLock); 906 907 ssize_t available; 908 909 if (is_writable(fState)) 910 available = fSendQueue.Free(); 911 else if (is_establishing(fState)) 912 available = 0; 913 else 914 available = EPIPE; 915 916 TRACE("SendAvailable(): %" B_PRIdSSIZE, available); 917 T(APICall(this, "sendavailable")); 918 return available; 919 } 920 921 922 status_t 923 TCPEndpoint::FillStat(net_stat *stat) 924 { 925 MutexLocker _(fLock); 926 927 strlcpy(stat->state, name_for_state(fState), sizeof(stat->state)); 928 stat->receive_queue_size = fReceiveQueue.Available(); 929 stat->send_queue_size = fSendQueue.Used(); 930 931 return B_OK; 932 } 933 934 935 status_t 936 TCPEndpoint::ReadData(size_t numBytes, uint32 flags, net_buffer** _buffer) 937 { 938 if ((flags & ~(MSG_DONTWAIT | MSG_WAITALL | MSG_PEEK)) != 0) 939 return EOPNOTSUPP; 940 941 MutexLocker locker(fLock); 942 943 TRACE("ReadData(%" B_PRIuSIZE " bytes, flags %#" B_PRIx32 ")", numBytes, 944 flags); 945 T(APICall(this, "readdata")); 946 947 *_buffer = NULL; 948 949 if (fState == CLOSED || fState == LISTEN) { 950 if (socket->error != B_OK) 951 return socket->error; 952 return ENOTCONN; 953 } 954 955 bigtime_t timeout = 0; 956 if ((flags & MSG_DONTWAIT) == 0) { 957 timeout = absolute_timeout(socket->receive.timeout); 958 if (gStackModule->is_restarted_syscall()) 959 timeout = gStackModule->restore_syscall_restart_timeout(); 960 else 961 gStackModule->store_syscall_restart_timeout(timeout); 962 } 963 964 if (fState == SYNCHRONIZE_SENT || fState == SYNCHRONIZE_RECEIVED) { 965 if (flags & MSG_DONTWAIT) 966 return B_WOULD_BLOCK; 967 968 status_t status = _WaitForEstablished(locker, timeout); 969 if (status < B_OK) 970 return posix_error(status); 971 } 972 973 size_t dataNeeded = socket->receive.low_water_mark; 974 975 // When MSG_WAITALL is set then the function should block 976 // until the full amount of data can be returned. 977 if (flags & MSG_WAITALL) 978 dataNeeded = numBytes; 979 980 // TODO: add support for urgent data (MSG_OOB) 981 982 while (true) { 983 if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE 984 || fState == TIME_WAIT) { 985 // ``Connection closing''. 986 if (fReceiveQueue.Available() > 0) 987 break; 988 return B_OK; 989 } 990 991 if (fReceiveQueue.Available() > 0) { 992 if (fReceiveQueue.Available() >= dataNeeded 993 || (fReceiveQueue.PushedData() > 0 994 && fReceiveQueue.PushedData() >= fReceiveQueue.Available())) 995 break; 996 } else if (fState == FINISH_RECEIVED) { 997 // ``If no text is awaiting delivery, the RECEIVE will 998 // get a Connection closing''. 999 return B_OK; 1000 } 1001 1002 if (timeout == 0) 1003 return B_WOULD_BLOCK; 1004 1005 if ((fFlags & FLAG_NO_RECEIVE) != 0) 1006 return B_OK; 1007 1008 status_t status = _WaitForCondition(fReceiveCondition, locker, timeout); 1009 if (status < B_OK) { 1010 // The Open Group base specification mentions that EINTR should be 1011 // returned if the recv() is interrupted before _any data_ is 1012 // available. So we actually check if there is data, and if so, 1013 // push it to the user. 1014 if ((status == B_TIMED_OUT || status == B_INTERRUPTED) 1015 && fReceiveQueue.Available() > 0) 1016 break; 1017 1018 return posix_error(status); 1019 } 1020 } 1021 1022 TRACE(" ReadData(): %" B_PRIuSIZE " are available.", 1023 fReceiveQueue.Available()); 1024 1025 if (numBytes < fReceiveQueue.Available()) 1026 fReceiveCondition.NotifyAll(); 1027 1028 bool clone = (flags & MSG_PEEK) != 0; 1029 1030 ssize_t receivedBytes = fReceiveQueue.Get(numBytes, !clone, _buffer); 1031 1032 TRACE(" ReadData(): %" B_PRIuSIZE " bytes kept.", 1033 fReceiveQueue.Available()); 1034 1035 if (fReceiveQueue.Available() == 0 && fState == FINISH_RECEIVED) 1036 socket->receive.low_water_mark = 0; 1037 1038 // if we opened the window, check if we should send a window update 1039 if (!clone) { 1040 // Only send if there's less than half the window size remaining. 1041 // TODO: This should use fReceiveWindow but that stays constant at present 1042 // due to another TODO. 1043 uint32 windowSizeRemaining = (fReceiveMaxAdvertised - fReceiveNext).Number(); 1044 if (fReceiveNext == (fReceiveMaxAdvertised + 1)) 1045 windowSizeRemaining = 0; 1046 if (windowSizeRemaining <= (fReceiveQueue.Size() / 2)) 1047 _SendAcknowledge(); 1048 } 1049 1050 return receivedBytes; 1051 } 1052 1053 1054 ssize_t 1055 TCPEndpoint::ReadAvailable() 1056 { 1057 MutexLocker locker(fLock); 1058 1059 TRACE("ReadAvailable(): %" B_PRIdSSIZE, _AvailableData()); 1060 T(APICall(this, "readavailable")); 1061 1062 return _AvailableData(); 1063 } 1064 1065 1066 status_t 1067 TCPEndpoint::SetSendBufferSize(size_t length) 1068 { 1069 MutexLocker _(fLock); 1070 fSendQueue.SetMaxBytes(length); 1071 return B_OK; 1072 } 1073 1074 1075 status_t 1076 TCPEndpoint::SetReceiveBufferSize(size_t length) 1077 { 1078 MutexLocker _(fLock); 1079 fReceiveQueue.SetMaxBytes(length); 1080 fFlags &= ~FLAG_AUTO_RECEIVE_BUFFER_SIZE; 1081 return B_OK; 1082 } 1083 1084 1085 status_t 1086 TCPEndpoint::GetOption(int option, void* _value, int* _length) 1087 { 1088 if (*_length != sizeof(int)) 1089 return B_BAD_VALUE; 1090 1091 int* value = (int*)_value; 1092 1093 switch (option) { 1094 case TCP_NODELAY: 1095 if ((fOptions & TCP_NODELAY) != 0) 1096 *value = 1; 1097 else 1098 *value = 0; 1099 return B_OK; 1100 1101 case TCP_MAXSEG: 1102 *value = fReceiveMaxSegmentSize; 1103 return B_OK; 1104 1105 default: 1106 return B_BAD_VALUE; 1107 } 1108 } 1109 1110 1111 status_t 1112 TCPEndpoint::SetOption(int option, const void* _value, int length) 1113 { 1114 if (option != TCP_NODELAY) 1115 return B_BAD_VALUE; 1116 1117 if (length != sizeof(int)) 1118 return B_BAD_VALUE; 1119 1120 const int* value = (const int*)_value; 1121 1122 MutexLocker _(fLock); 1123 if (*value) 1124 fOptions |= TCP_NODELAY; 1125 else 1126 fOptions &= ~TCP_NODELAY; 1127 1128 return B_OK; 1129 } 1130 1131 1132 // #pragma mark - misc 1133 1134 1135 bool 1136 TCPEndpoint::IsBound() const 1137 { 1138 return !LocalAddress().IsEmpty(true); 1139 } 1140 1141 1142 bool 1143 TCPEndpoint::IsLocal() const 1144 { 1145 return (fFlags & FLAG_LOCAL) != 0; 1146 } 1147 1148 1149 status_t 1150 TCPEndpoint::DelayedAcknowledge() 1151 { 1152 // ACKs "MUST" be generated within 500ms of the first unACKed packet, and 1153 // "SHOULD" be for at least every second full-size segment. (RFC 5681 § 4.2) 1154 1155 bigtime_t delay = TCP_DELAYED_ACKNOWLEDGE_TIMEOUT; 1156 if ((fReceiveNext - fLastAcknowledgeSent) >= (fReceiveMaxSegmentSize * 2)) { 1157 // Trigger an immediate timeout rather than invoking Send directly, 1158 // allowing multiple invocations to be coalesced. 1159 delay = 0; 1160 } else if (gStackModule->is_timer_active(&fDelayedAcknowledgeTimer)) { 1161 return B_OK; 1162 } 1163 1164 gStackModule->set_timer(&fDelayedAcknowledgeTimer, delay); 1165 T(TimerSet(this, "delayed ack", TCP_DELAYED_ACKNOWLEDGE_TIMEOUT)); 1166 return B_OK; 1167 } 1168 1169 1170 void 1171 TCPEndpoint::_StartPersistTimer() 1172 { 1173 gStackModule->set_timer(&fPersistTimer, TCP_PERSIST_TIMEOUT); 1174 T(TimerSet(this, "persist", TCP_PERSIST_TIMEOUT)); 1175 } 1176 1177 1178 void 1179 TCPEndpoint::_EnterTimeWait() 1180 { 1181 TRACE("_EnterTimeWait()"); 1182 1183 if (fState == TIME_WAIT) 1184 _CancelConnectionTimers(); 1185 1186 _UpdateTimeWait(); 1187 } 1188 1189 1190 void 1191 TCPEndpoint::_UpdateTimeWait() 1192 { 1193 gStackModule->set_timer(&fTimeWaitTimer, TCP_MAX_SEGMENT_LIFETIME << 1); 1194 T(TimerSet(this, "time-wait", TCP_MAX_SEGMENT_LIFETIME << 1)); 1195 } 1196 1197 1198 void 1199 TCPEndpoint::_CancelConnectionTimers() 1200 { 1201 gStackModule->cancel_timer(&fRetransmitTimer); 1202 T(TimerSet(this, "retransmit", -1)); 1203 gStackModule->cancel_timer(&fPersistTimer); 1204 T(TimerSet(this, "persist", -1)); 1205 gStackModule->cancel_timer(&fDelayedAcknowledgeTimer); 1206 T(TimerSet(this, "delayed ack", -1)); 1207 } 1208 1209 1210 /*! Sends the FIN flag to the peer when the connection is still open. 1211 Moves the endpoint to the next state depending on where it was. 1212 */ 1213 status_t 1214 TCPEndpoint::_Disconnect(bool closing) 1215 { 1216 tcp_state previousState = fState; 1217 1218 if (fState == SYNCHRONIZE_RECEIVED || fState == ESTABLISHED) 1219 fState = FINISH_SENT; 1220 else if (fState == FINISH_RECEIVED) 1221 fState = WAIT_FOR_FINISH_ACKNOWLEDGE; 1222 else 1223 return B_OK; 1224 1225 T(State(this)); 1226 1227 status_t status = _SendQueued(); 1228 if (status != B_OK) { 1229 fState = previousState; 1230 T(State(this)); 1231 return status; 1232 } 1233 1234 return B_OK; 1235 } 1236 1237 1238 void 1239 TCPEndpoint::_MarkEstablished() 1240 { 1241 fState = ESTABLISHED; 1242 T(State(this)); 1243 1244 gSocketModule->set_connected(socket); 1245 if (gSocketModule->has_parent(socket)) 1246 release_sem_etc(fAcceptSemaphore, 1, B_DO_NOT_RESCHEDULE); 1247 1248 fSendCondition.NotifyAll(); 1249 gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Free()); 1250 } 1251 1252 1253 status_t 1254 TCPEndpoint::_WaitForEstablished(MutexLocker &locker, bigtime_t timeout) 1255 { 1256 // TODO: Checking for CLOSED seems correct, but breaks several neon tests. 1257 // When investigating this, also have a look at _Close() and _HandleReset(). 1258 while (fState < ESTABLISHED/* && fState != CLOSED*/) { 1259 if (socket->error != B_OK) 1260 return socket->error; 1261 1262 status_t status = _WaitForCondition(fSendCondition, locker, timeout); 1263 if (status < B_OK) 1264 return status; 1265 } 1266 1267 return B_OK; 1268 } 1269 1270 1271 // #pragma mark - receive 1272 1273 1274 void 1275 TCPEndpoint::_Close() 1276 { 1277 _CancelConnectionTimers(); 1278 fState = CLOSED; 1279 T(State(this)); 1280 1281 fFlags |= FLAG_DELETE_ON_CLOSE; 1282 1283 fSendCondition.NotifyAll(); 1284 _NotifyReader(); 1285 1286 if (gSocketModule->has_parent(socket)) { 1287 // We still have a parent - obviously, we haven't been accepted yet, 1288 // so no one could ever close us. 1289 _CancelConnectionTimers(); 1290 gSocketModule->set_aborted(socket); 1291 } 1292 } 1293 1294 1295 void 1296 TCPEndpoint::_HandleReset(status_t error) 1297 { 1298 socket->error = error; 1299 _Close(); 1300 1301 gSocketModule->notify(socket, B_SELECT_WRITE, error); 1302 gSocketModule->notify(socket, B_SELECT_ERROR, error); 1303 } 1304 1305 1306 void 1307 TCPEndpoint::_DuplicateAcknowledge(tcp_segment_header &segment) 1308 { 1309 if (fDuplicateAcknowledgeCount == 0) 1310 fPreviousFlightSize = (fSendMax - fSendUnacknowledged).Number(); 1311 1312 if (++fDuplicateAcknowledgeCount < 3) { 1313 if (fSendQueue.Available(fSendMax) != 0 && fSendWindow != 0) { 1314 fSendNext = fSendMax; 1315 fCongestionWindow += fDuplicateAcknowledgeCount * fSendMaxSegmentSize; 1316 _SendQueued(); 1317 TRACE("_DuplicateAcknowledge(): packet sent under limited transmit on receipt of dup ack"); 1318 fCongestionWindow -= fDuplicateAcknowledgeCount * fSendMaxSegmentSize; 1319 } 1320 } 1321 1322 if (fDuplicateAcknowledgeCount == 3) { 1323 if ((segment.acknowledge - 1) > fRecover || (fCongestionWindow > fSendMaxSegmentSize && 1324 (fSendUnacknowledged - fPreviousHighestAcknowledge) <= 4 * fSendMaxSegmentSize)) { 1325 fFlags |= FLAG_RECOVERY; 1326 fRecover = fSendMax.Number() - 1; 1327 fSlowStartThreshold = max_c(fPreviousFlightSize / 2, 2 * fSendMaxSegmentSize); 1328 fCongestionWindow = fSlowStartThreshold + 3 * fSendMaxSegmentSize; 1329 fSendNext = segment.acknowledge; 1330 _SendQueued(); 1331 TRACE("_DuplicateAcknowledge(): packet sent under fast restransmit on the receipt of 3rd dup ack"); 1332 } 1333 } else if (fDuplicateAcknowledgeCount > 3) { 1334 uint32 flightSize = (fSendMax - fSendUnacknowledged).Number(); 1335 if ((fDuplicateAcknowledgeCount - 3) * fSendMaxSegmentSize <= flightSize) 1336 fCongestionWindow += fSendMaxSegmentSize; 1337 if (fSendQueue.Available(fSendMax) != 0) { 1338 fSendNext = fSendMax; 1339 _SendQueued(); 1340 } 1341 } 1342 } 1343 1344 1345 void 1346 TCPEndpoint::_UpdateTimestamps(tcp_segment_header& segment, 1347 size_t segmentLength) 1348 { 1349 if (fFlags & FLAG_OPTION_TIMESTAMP) { 1350 tcp_sequence sequence(segment.sequence); 1351 1352 if (fLastAcknowledgeSent >= sequence 1353 && fLastAcknowledgeSent < (sequence + segmentLength)) 1354 fReceivedTimestamp = segment.timestamp_value; 1355 } 1356 } 1357 1358 1359 void 1360 TCPEndpoint::_UpdateReceiveBuffer() 1361 { 1362 if (fReceiveWindowShift == 0 || fSmoothedRoundTripTime < 0) 1363 return; 1364 if ((fFlags & FLAG_AUTO_RECEIVE_BUFFER_SIZE) == 0) 1365 return; 1366 1367 const uint64 maxWindowSize = UINT16_MAX << fReceiveWindowShift; 1368 if (fReceiveQueue.Size() == maxWindowSize) { 1369 // The window is already as large as it could be. 1370 return; 1371 } 1372 1373 if (fReceiveSizingTimestamp == 0) { 1374 fReceiveSizingTimestamp = tcp_now(); 1375 fReceiveSizingReference = fReceiveNext; 1376 return; 1377 } 1378 1379 const uint32 received = (fReceiveNext - fReceiveSizingReference).Number(); 1380 if (received < (fReceiveQueue.Size() / 2)) 1381 return; 1382 1383 const uint32 since = tcp_diff_timestamp(fReceiveSizingTimestamp); 1384 if (since == 0 || since < ((uint32)fSmoothedRoundTripTime * 2)) 1385 return; 1386 1387 fReceiveSizingTimestamp = tcp_now(); 1388 fReceiveSizingReference = fReceiveNext; 1389 1390 // TODO: add low_resource hook to reduce window sizes. 1391 if (low_resource_state(B_KERNEL_RESOURCE_MEMORY) != B_NO_LOW_RESOURCE) 1392 return; 1393 1394 // Target a window large enough to fit one second of traffic. 1395 const uint64 oneSecondReceive = (received * 1000LL) / since; 1396 if (fReceiveQueue.Size() >= oneSecondReceive) 1397 return; 1398 1399 // Don't increase the window size too rapidly. 1400 uint32 newWindowSize = min_c(oneSecondReceive, UINT32_MAX); 1401 if (newWindowSize > (fReceiveQueue.Size() * 2)) 1402 newWindowSize = fReceiveQueue.Size() * 2; 1403 1404 // Round to the window shift and max segment size. 1405 uint32 newWindowShifted = newWindowSize >> fReceiveWindowShift; 1406 const uint32 maxSegmentShifted = fReceiveMaxSegmentSize >> fReceiveWindowShift; 1407 newWindowShifted = (newWindowShifted / maxSegmentShifted) * maxSegmentShifted; 1408 newWindowSize = newWindowShifted << fReceiveWindowShift; 1409 1410 if (newWindowSize > maxWindowSize) 1411 newWindowSize = maxWindowSize; 1412 if (fReceiveQueue.Size() >= newWindowSize) 1413 return; 1414 1415 fReceiveQueue.SetMaxBytes(newWindowSize); 1416 TRACE("TCPEndpoint: updated receive buffer size to %" B_PRIu32 "\n", newWindowSize); 1417 } 1418 1419 1420 ssize_t 1421 TCPEndpoint::_AvailableData() const 1422 { 1423 // TODO: Refer to the FLAG_NO_RECEIVE comment above regarding 1424 // the application of FLAG_NO_RECEIVE in listen()ing 1425 // sockets. 1426 if (fState == LISTEN) 1427 return gSocketModule->count_connected(socket); 1428 if (fState == SYNCHRONIZE_SENT) 1429 return 0; 1430 1431 ssize_t availableData = fReceiveQueue.Available(); 1432 1433 if (availableData == 0 && !_ShouldReceive()) 1434 return ENOTCONN; 1435 if (availableData == 0 && (fState == FINISH_RECEIVED || fState == WAIT_FOR_FINISH_ACKNOWLEDGE)) 1436 return ESHUTDOWN; 1437 return availableData; 1438 } 1439 1440 1441 void 1442 TCPEndpoint::_NotifyReader() 1443 { 1444 fReceiveCondition.NotifyAll(); 1445 gSocketModule->notify(socket, B_SELECT_READ, _AvailableData()); 1446 } 1447 1448 1449 bool 1450 TCPEndpoint::_AddData(tcp_segment_header& segment, net_buffer* buffer) 1451 { 1452 if ((segment.flags & TCP_FLAG_FINISH) != 0) { 1453 // Remember the position of the finish received flag 1454 fFinishReceived = true; 1455 fFinishReceivedAt = segment.sequence + buffer->size; 1456 } 1457 1458 fReceiveQueue.Add(buffer, segment.sequence); 1459 fReceiveNext = fReceiveQueue.NextSequence(); 1460 1461 if (fFinishReceived) { 1462 // Set or reset the finish flag on the current segment 1463 if (fReceiveNext < fFinishReceivedAt) 1464 segment.flags &= ~TCP_FLAG_FINISH; 1465 else 1466 segment.flags |= TCP_FLAG_FINISH; 1467 } 1468 1469 TRACE(" _AddData(): adding data, receive next = %" B_PRIu32 ". Now have %" 1470 B_PRIuSIZE " bytes.", fReceiveNext.Number(), fReceiveQueue.Available()); 1471 1472 _UpdateReceiveBuffer(); 1473 1474 if ((segment.flags & TCP_FLAG_PUSH) != 0) 1475 fReceiveQueue.SetPushPointer(); 1476 1477 return fReceiveQueue.Available() > 0; 1478 } 1479 1480 1481 void 1482 TCPEndpoint::_PrepareReceivePath(tcp_segment_header& segment) 1483 { 1484 fInitialReceiveSequence = segment.sequence; 1485 fFinishReceived = false; 1486 fReceiveSizingTimestamp = 0; 1487 1488 // count the received SYN 1489 segment.sequence++; 1490 1491 fReceiveNext = segment.sequence; 1492 fReceiveQueue.SetInitialSequence(segment.sequence); 1493 1494 if ((fOptions & TCP_NOOPT) == 0) { 1495 if (segment.max_segment_size > 0) { 1496 // The maximum size of a segment that a TCP endpoint really sends, 1497 // the "effective send MSS", MUST be the smaller of the send MSS and 1498 // the largest transmission size permitted by the IP layer: 1499 fSendMaxSegmentSize = min_c(segment.max_segment_size, 1500 _MaxSegmentSize(*PeerAddress())); 1501 } 1502 1503 if (segment.options & TCP_HAS_WINDOW_SCALE) { 1504 fFlags |= FLAG_OPTION_WINDOW_SCALE; 1505 fSendWindowShift = segment.window_shift; 1506 } else { 1507 fFlags &= ~FLAG_OPTION_WINDOW_SCALE; 1508 fReceiveWindowShift = 0; 1509 } 1510 1511 if (segment.options & TCP_HAS_TIMESTAMPS) { 1512 fFlags |= FLAG_OPTION_TIMESTAMP; 1513 fReceivedTimestamp = segment.timestamp_value; 1514 } else 1515 fFlags &= ~FLAG_OPTION_TIMESTAMP; 1516 1517 if ((segment.options & TCP_SACK_PERMITTED) == 0) { 1518 fFlags &= ~FLAG_OPTION_SACK_PERMITTED; 1519 fFlags &= ~FLAG_AUTO_RECEIVE_BUFFER_SIZE; 1520 } 1521 } 1522 1523 if (fSendMaxSegmentSize > 2190) 1524 fCongestionWindow = 2 * fSendMaxSegmentSize; 1525 else if (fSendMaxSegmentSize > 1095) 1526 fCongestionWindow = 3 * fSendMaxSegmentSize; 1527 else 1528 fCongestionWindow = 4 * fSendMaxSegmentSize; 1529 1530 fSendMaxSegments = fCongestionWindow / fSendMaxSegmentSize; 1531 fSlowStartThreshold = (uint32)segment.advertised_window << fSendWindowShift; 1532 } 1533 1534 1535 bool 1536 TCPEndpoint::_ShouldReceive() const 1537 { 1538 if ((fFlags & FLAG_NO_RECEIVE) != 0) 1539 return false; 1540 1541 return fState == ESTABLISHED || fState == FINISH_SENT 1542 || fState == FINISH_ACKNOWLEDGED || fState == FINISH_RECEIVED; 1543 } 1544 1545 1546 int32 1547 TCPEndpoint::_Spawn(TCPEndpoint* parent, tcp_segment_header& segment, 1548 net_buffer* buffer) 1549 { 1550 MutexLocker _(fLock); 1551 1552 TRACE("Spawn()"); 1553 1554 // TODO: proper error handling! 1555 if (ProtocolSocket::Open() != B_OK) { 1556 T(Error(this, "opening failed", __LINE__)); 1557 return DROP; 1558 } 1559 1560 fState = SYNCHRONIZE_RECEIVED; 1561 T(Spawn(parent, this)); 1562 1563 fManager = parent->fManager; 1564 1565 if (fManager->BindChild(this, buffer->destination) != B_OK) { 1566 T(Error(this, "binding failed", __LINE__)); 1567 return DROP; 1568 } 1569 if (_PrepareSendPath(buffer->source) != B_OK) { 1570 T(Error(this, "prepare send faild", __LINE__)); 1571 return DROP; 1572 } 1573 1574 fOptions = parent->fOptions; 1575 fAcceptSemaphore = parent->fAcceptSemaphore; 1576 1577 _PrepareReceivePath(segment); 1578 1579 // send SYN+ACK 1580 if (_SendAcknowledge() != B_OK) { 1581 T(Error(this, "sending failed", __LINE__)); 1582 return DROP; 1583 } 1584 1585 segment.flags &= ~TCP_FLAG_SYNCHRONIZE; 1586 // we handled this flag now, it must not be set for further processing 1587 1588 return _Receive(segment, buffer); 1589 } 1590 1591 1592 int32 1593 TCPEndpoint::_ListenReceive(tcp_segment_header& segment, net_buffer* buffer) 1594 { 1595 TRACE("ListenReceive()"); 1596 1597 // Essentially, we accept only TCP_FLAG_SYNCHRONIZE in this state, 1598 // but the error behaviour differs 1599 if (segment.flags & TCP_FLAG_RESET) 1600 return DROP; 1601 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) 1602 return DROP | RESET; 1603 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0) 1604 return DROP; 1605 1606 // TODO: drop broadcast/multicast 1607 1608 // spawn new endpoint for accept() 1609 net_socket* newSocket; 1610 if (gSocketModule->spawn_pending_socket(socket, &newSocket) < B_OK) { 1611 T(Error(this, "spawning failed", __LINE__)); 1612 return DROP; 1613 } 1614 1615 return ((TCPEndpoint *)newSocket->first_protocol)->_Spawn(this, 1616 segment, buffer); 1617 } 1618 1619 1620 int32 1621 TCPEndpoint::_SynchronizeSentReceive(tcp_segment_header &segment, 1622 net_buffer *buffer) 1623 { 1624 TRACE("_SynchronizeSentReceive()"); 1625 1626 if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0 1627 && (fInitialSendSequence >= segment.acknowledge 1628 || fSendMax < segment.acknowledge)) 1629 return DROP | RESET; 1630 1631 if (segment.flags & TCP_FLAG_RESET) { 1632 _HandleReset(ECONNREFUSED); 1633 return DROP; 1634 } 1635 1636 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0) 1637 return DROP; 1638 1639 fSendUnacknowledged = segment.acknowledge; 1640 _PrepareReceivePath(segment); 1641 1642 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) { 1643 _MarkEstablished(); 1644 } else { 1645 // simultaneous open 1646 fState = SYNCHRONIZE_RECEIVED; 1647 T(State(this)); 1648 } 1649 1650 segment.flags &= ~TCP_FLAG_SYNCHRONIZE; 1651 // we handled this flag now, it must not be set for further processing 1652 1653 return _Receive(segment, buffer) | IMMEDIATE_ACKNOWLEDGE; 1654 } 1655 1656 1657 int32 1658 TCPEndpoint::_Receive(tcp_segment_header& segment, net_buffer* buffer) 1659 { 1660 // PAWS processing takes precedence over regular TCP acceptability check 1661 if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0 && (segment.flags & TCP_FLAG_RESET) == 0) { 1662 if ((segment.options & TCP_HAS_TIMESTAMPS) == 0) 1663 return DROP; 1664 if ((int32)(fReceivedTimestamp - segment.timestamp_value) > 0 1665 && (fReceivedTimestamp - segment.timestamp_value) <= INT32_MAX) 1666 return DROP | IMMEDIATE_ACKNOWLEDGE; 1667 } 1668 1669 uint32 advertisedWindow = segment.AdvertisedWindow(fSendWindowShift); 1670 size_t segmentLength = buffer->size; 1671 1672 // First, handle the most common case for uni-directional data transfer 1673 // (known as header prediction - the segment must not change the window, 1674 // and must be the expected sequence, and contain no control flags) 1675 1676 if (fState == ESTABLISHED 1677 && segment.AcknowledgeOnly() 1678 && fReceiveNext == segment.sequence 1679 && advertisedWindow > 0 && advertisedWindow == fSendWindow 1680 && fSendNext == fSendMax) { 1681 _UpdateTimestamps(segment, segmentLength); 1682 1683 if (segmentLength == 0) { 1684 // this is a pure acknowledge segment - we're on the sending end 1685 if (fSendUnacknowledged < segment.acknowledge 1686 && fSendMax >= segment.acknowledge) { 1687 _Acknowledged(segment); 1688 return DROP; 1689 } 1690 } else if (segment.acknowledge == fSendUnacknowledged 1691 && fReceiveQueue.IsContiguous() 1692 && fReceiveQueue.Free() >= segmentLength 1693 && (fFlags & FLAG_NO_RECEIVE) == 0) { 1694 if (_AddData(segment, buffer)) 1695 _NotifyReader(); 1696 1697 return KEEP | ((segment.flags & TCP_FLAG_PUSH) != 0 1698 ? IMMEDIATE_ACKNOWLEDGE : ACKNOWLEDGE); 1699 } 1700 } 1701 1702 // The fast path was not applicable, so we continue with the standard 1703 // processing of the incoming segment 1704 1705 ASSERT(fState != SYNCHRONIZE_SENT && fState != LISTEN); 1706 1707 if (fState != CLOSED && fState != TIME_WAIT) { 1708 // Check sequence number 1709 if (!segment_in_sequence(segment, segmentLength, fReceiveNext, 1710 fReceiveWindow)) { 1711 TRACE(" Receive(): segment out of window, next: %" B_PRIu32 1712 " wnd: %" B_PRIu32, fReceiveNext.Number(), fReceiveWindow); 1713 if ((segment.flags & TCP_FLAG_RESET) != 0) { 1714 // TODO: this doesn't look right - review! 1715 return DROP; 1716 } 1717 return DROP | IMMEDIATE_ACKNOWLEDGE; 1718 } 1719 } 1720 1721 if ((segment.flags & TCP_FLAG_RESET) != 0) { 1722 // Is this a valid reset? 1723 // We generally ignore resets in time wait state (see RFC 1337) 1724 if (fLastAcknowledgeSent <= segment.sequence 1725 && tcp_sequence(segment.sequence) < (fLastAcknowledgeSent 1726 + fReceiveWindow) 1727 && fState != TIME_WAIT) { 1728 status_t error; 1729 if (fState == SYNCHRONIZE_RECEIVED) 1730 error = ECONNREFUSED; 1731 else if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE) 1732 error = ENOTCONN; 1733 else 1734 error = ECONNRESET; 1735 1736 _HandleReset(error); 1737 } 1738 1739 return DROP; 1740 } 1741 1742 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0 1743 || (fState == SYNCHRONIZE_RECEIVED 1744 && (fInitialReceiveSequence > segment.sequence 1745 || ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0 1746 && (fSendUnacknowledged > segment.acknowledge 1747 || fSendMax < segment.acknowledge))))) { 1748 // reset the connection - either the initial SYN was faulty, or we 1749 // received a SYN within the data stream 1750 return DROP | RESET; 1751 } 1752 1753 // TODO: Check this! Why do we advertize a window outside of what we should 1754 // buffer? 1755 fReceiveWindow = max_c(fReceiveQueue.Free(), fReceiveWindow); 1756 // the window must not shrink 1757 1758 // trim buffer to be within the receive window 1759 int32 drop = (int32)(fReceiveNext - segment.sequence).Number(); 1760 if (drop > 0) { 1761 if ((uint32)drop > buffer->size 1762 || ((uint32)drop == buffer->size 1763 && (segment.flags & TCP_FLAG_FINISH) == 0)) { 1764 // don't accidently remove a FIN we shouldn't remove 1765 segment.flags &= ~TCP_FLAG_FINISH; 1766 drop = buffer->size; 1767 } 1768 1769 // remove duplicate data at the start 1770 TRACE("* remove %" B_PRId32 " bytes from the start", drop); 1771 gBufferModule->remove_header(buffer, drop); 1772 segment.sequence += drop; 1773 } 1774 1775 int32 action = KEEP; 1776 1777 // Immediately acknowledge out-of-order segments, as well as any 1778 // in-order segments following out-of-order segments, to trigger 1779 // fast-retransmit and fast-recovery at the sender. 1780 if (drop != 0 || (drop == 0 && !fReceiveQueue.IsContiguous())) 1781 action |= IMMEDIATE_ACKNOWLEDGE; 1782 1783 drop = (int32)(segment.sequence + buffer->size 1784 - (fReceiveNext + fReceiveWindow)).Number(); 1785 if (drop > 0) { 1786 // remove data exceeding our window 1787 if ((uint32)drop >= buffer->size) { 1788 // if we can accept data, or the segment is not what we'd expect, 1789 // drop the segment (an immediate acknowledge is always triggered) 1790 if (fReceiveWindow != 0 || segment.sequence != fReceiveNext) 1791 return DROP | IMMEDIATE_ACKNOWLEDGE; 1792 1793 action |= IMMEDIATE_ACKNOWLEDGE; 1794 } 1795 1796 if ((segment.flags & TCP_FLAG_FINISH) != 0) { 1797 // we need to remove the finish, too, as part of the data 1798 drop--; 1799 } 1800 1801 segment.flags &= ~(TCP_FLAG_FINISH | TCP_FLAG_PUSH); 1802 TRACE("* remove %" B_PRId32 " bytes from the end", drop); 1803 gBufferModule->remove_trailer(buffer, drop); 1804 } 1805 1806 #ifdef TRACE_TCP 1807 if (advertisedWindow > fSendWindow) { 1808 TRACE(" Receive(): Window update %" B_PRIu32 " -> %" B_PRIu32, 1809 fSendWindow, advertisedWindow); 1810 } 1811 #endif 1812 1813 if (fSendWindow < fSendMaxSegmentSize 1814 && (advertisedWindow >= fSendMaxSegmentSize 1815 || advertisedWindow > (TCP_DEFAULT_MAX_SEGMENT_SIZE * 3))) { 1816 // Our current send window is less than a segment wide, and the new one 1817 // is larger, so trigger a send in case there's anything to be sent. 1818 action |= SEND_QUEUED; 1819 } 1820 1821 fSendWindow = advertisedWindow; 1822 if (advertisedWindow > fSendMaxWindow) 1823 fSendMaxWindow = advertisedWindow; 1824 1825 // Then look at the acknowledgement for any updates 1826 1827 if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0) { 1828 // process acknowledged data 1829 if (fState == SYNCHRONIZE_RECEIVED) 1830 _MarkEstablished(); 1831 1832 if (fSendMax < segment.acknowledge) 1833 return DROP | IMMEDIATE_ACKNOWLEDGE; 1834 1835 if (segment.acknowledge == fSendUnacknowledged) { 1836 if (buffer->size == 0 && advertisedWindow == fSendWindow 1837 && (segment.flags & TCP_FLAG_FINISH) == 0 && fSendUnacknowledged != fSendMax) { 1838 TRACE("Receive(): duplicate ack!"); 1839 _DuplicateAcknowledge(segment); 1840 } 1841 } else if (segment.acknowledge < fSendUnacknowledged) { 1842 return DROP; 1843 } else { 1844 // this segment acknowledges in flight data 1845 1846 if (fDuplicateAcknowledgeCount >= 3) { 1847 // deflate the window. 1848 if (segment.acknowledge > fRecover) { 1849 uint32 flightSize = (fSendMax - fSendUnacknowledged).Number(); 1850 fCongestionWindow = min_c(fSlowStartThreshold, 1851 max_c(flightSize, fSendMaxSegmentSize) + fSendMaxSegmentSize); 1852 fFlags &= ~FLAG_RECOVERY; 1853 } 1854 } 1855 1856 if (fSendMax == segment.acknowledge) 1857 TRACE("Receive(): all inflight data ack'd!"); 1858 1859 if (segment.acknowledge > fSendQueue.LastSequence() 1860 && fState > ESTABLISHED) { 1861 TRACE("Receive(): FIN has been acknowledged!"); 1862 1863 switch (fState) { 1864 case FINISH_SENT: 1865 fState = FINISH_ACKNOWLEDGED; 1866 T(State(this)); 1867 break; 1868 case CLOSING: 1869 fState = TIME_WAIT; 1870 T(State(this)); 1871 _EnterTimeWait(); 1872 return DROP; 1873 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1874 _Close(); 1875 break; 1876 1877 default: 1878 break; 1879 } 1880 } 1881 1882 if (fState != CLOSED) { 1883 tcp_sequence last = fLastAcknowledgeSent; 1884 _Acknowledged(segment); 1885 // we just sent an acknowledge, remove from action 1886 if (last < fLastAcknowledgeSent) 1887 action &= ~IMMEDIATE_ACKNOWLEDGE; 1888 } 1889 } 1890 } 1891 1892 if (segment.flags & TCP_FLAG_URGENT) { 1893 if (fState == ESTABLISHED || fState == FINISH_SENT 1894 || fState == FINISH_ACKNOWLEDGED) { 1895 // TODO: Handle urgent data: 1896 // - RCV.UP <- max(RCV.UP, SEG.UP) 1897 // - signal the user that urgent data is available (SIGURG) 1898 } 1899 } 1900 1901 bool notify = false; 1902 1903 // The buffer may be freed if its data is added to the queue, so cache 1904 // the size as we still need it later. 1905 const uint32 bufferSize = buffer->size; 1906 1907 if ((bufferSize > 0 || (segment.flags & TCP_FLAG_FINISH) != 0) 1908 && _ShouldReceive()) 1909 notify = _AddData(segment, buffer); 1910 else { 1911 if ((fFlags & FLAG_NO_RECEIVE) != 0) 1912 fReceiveNext += buffer->size; 1913 1914 action = (action & ~KEEP) | DROP; 1915 } 1916 1917 if ((segment.flags & TCP_FLAG_FINISH) != 0) { 1918 segmentLength++; 1919 if (fState != CLOSED && fState != LISTEN && fState != SYNCHRONIZE_SENT) { 1920 TRACE("Receive(): peer is finishing connection!"); 1921 fReceiveNext++; 1922 notify = true; 1923 1924 // FIN implies PUSH 1925 fReceiveQueue.SetPushPointer(); 1926 1927 // we'll reply immediately to the FIN if we are not 1928 // transitioning to TIME WAIT so we immediatly ACK it. 1929 action |= IMMEDIATE_ACKNOWLEDGE; 1930 1931 // other side is closing connection; change states 1932 switch (fState) { 1933 case ESTABLISHED: 1934 case SYNCHRONIZE_RECEIVED: 1935 fState = FINISH_RECEIVED; 1936 T(State(this)); 1937 break; 1938 case FINISH_SENT: 1939 // simultaneous close 1940 fState = CLOSING; 1941 T(State(this)); 1942 break; 1943 case FINISH_ACKNOWLEDGED: 1944 fState = TIME_WAIT; 1945 T(State(this)); 1946 _EnterTimeWait(); 1947 break; 1948 case TIME_WAIT: 1949 _UpdateTimeWait(); 1950 break; 1951 1952 default: 1953 break; 1954 } 1955 } 1956 } 1957 1958 if (notify) 1959 _NotifyReader(); 1960 1961 if (bufferSize > 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) != 0) 1962 action |= ACKNOWLEDGE; 1963 1964 _UpdateTimestamps(segment, segmentLength); 1965 1966 TRACE("Receive() Action %" B_PRId32, action); 1967 1968 return action; 1969 } 1970 1971 1972 int32 1973 TCPEndpoint::SegmentReceived(tcp_segment_header& segment, net_buffer* buffer) 1974 { 1975 MutexLocker locker(fLock); 1976 1977 TRACE("SegmentReceived(): buffer %p (%" B_PRIu32 " bytes) address %s " 1978 "to %s flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32 1979 ", wnd %" B_PRIu32, buffer, buffer->size, PrintAddress(buffer->source), 1980 PrintAddress(buffer->destination), segment.flags, segment.sequence, 1981 segment.acknowledge, 1982 (uint32)segment.advertised_window << fSendWindowShift); 1983 T(Receive(this, segment, 1984 (uint32)segment.advertised_window << fSendWindowShift, buffer)); 1985 int32 segmentAction = DROP; 1986 1987 switch (fState) { 1988 case LISTEN: 1989 segmentAction = _ListenReceive(segment, buffer); 1990 break; 1991 1992 case SYNCHRONIZE_SENT: 1993 segmentAction = _SynchronizeSentReceive(segment, buffer); 1994 break; 1995 1996 case SYNCHRONIZE_RECEIVED: 1997 case ESTABLISHED: 1998 case FINISH_RECEIVED: 1999 case WAIT_FOR_FINISH_ACKNOWLEDGE: 2000 case FINISH_SENT: 2001 case FINISH_ACKNOWLEDGED: 2002 case CLOSING: 2003 case TIME_WAIT: 2004 case CLOSED: 2005 segmentAction = _Receive(segment, buffer); 2006 break; 2007 } 2008 2009 // process acknowledge action as asked for by the *Receive() method 2010 if (segmentAction & IMMEDIATE_ACKNOWLEDGE) 2011 _SendAcknowledge(true); 2012 else if (segmentAction & ACKNOWLEDGE) 2013 DelayedAcknowledge(); 2014 2015 if (segmentAction & SEND_QUEUED) 2016 _SendQueued(); 2017 2018 if ((fFlags & (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) 2019 == (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) { 2020 2021 locker.Unlock(); 2022 if (gSocketModule->release_socket(socket)) 2023 segmentAction |= DELETED_ENDPOINT; 2024 } 2025 2026 return segmentAction; 2027 } 2028 2029 2030 // #pragma mark - send 2031 2032 2033 tcp_segment_header 2034 TCPEndpoint::_PrepareSendSegment() 2035 { 2036 // we don't set FLAG_FINISH here, instead we do it 2037 // conditionally below depending if we are sending 2038 // the last bytes of the send queue. 2039 2040 uint8 flags = 0; 2041 switch (fState) { 2042 case CLOSED: 2043 flags = TCP_FLAG_RESET | TCP_FLAG_ACKNOWLEDGE; 2044 break; 2045 2046 case SYNCHRONIZE_SENT: 2047 flags = TCP_FLAG_SYNCHRONIZE; 2048 break; 2049 2050 case SYNCHRONIZE_RECEIVED: 2051 flags = TCP_FLAG_SYNCHRONIZE | TCP_FLAG_ACKNOWLEDGE; 2052 break; 2053 2054 case ESTABLISHED: 2055 case FINISH_RECEIVED: 2056 case FINISH_ACKNOWLEDGED: 2057 case TIME_WAIT: 2058 case WAIT_FOR_FINISH_ACKNOWLEDGE: 2059 case FINISH_SENT: 2060 case CLOSING: 2061 flags = TCP_FLAG_ACKNOWLEDGE; 2062 2063 default: 2064 break; 2065 } 2066 2067 tcp_segment_header segment(flags); 2068 2069 if ((fOptions & TCP_NOOPT) == 0) { 2070 if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0) { 2071 segment.options |= TCP_HAS_TIMESTAMPS; 2072 segment.timestamp_reply = fReceivedTimestamp; 2073 segment.timestamp_value = tcp_now(); 2074 } 2075 2076 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0 2077 && fSendNext == fInitialSendSequence) { 2078 // add connection establishment options 2079 segment.max_segment_size = fReceiveMaxSegmentSize; 2080 if (fFlags & FLAG_OPTION_WINDOW_SCALE) { 2081 segment.options |= TCP_HAS_WINDOW_SCALE; 2082 segment.window_shift = fReceiveWindowShift; 2083 } 2084 if ((fFlags & FLAG_OPTION_SACK_PERMITTED) != 0) 2085 segment.options |= TCP_SACK_PERMITTED; 2086 } 2087 2088 if (!fReceiveQueue.IsContiguous() 2089 && (fFlags & FLAG_OPTION_SACK_PERMITTED) != 0) { 2090 segment.options |= TCP_HAS_SACK; 2091 int maxSackCount = MAX_SACK_BLKS 2092 - (((fFlags & FLAG_OPTION_TIMESTAMP) != 0) ? 1 : 0); 2093 memset(segment.sacks, 0, sizeof(segment.sacks)); 2094 segment.sackCount = fReceiveQueue.PopulateSackInfo(fReceiveNext, 2095 maxSackCount, segment.sacks); 2096 } 2097 } 2098 2099 size_t availableBytes = fReceiveQueue.Free(); 2100 // window size must remain same for duplicate acknowledgements 2101 if (!fReceiveQueue.IsContiguous()) 2102 availableBytes = (fReceiveMaxAdvertised - fReceiveNext).Number(); 2103 segment.SetAdvertisedWindow(availableBytes, fReceiveWindowShift); 2104 2105 segment.acknowledge = fReceiveNext.Number(); 2106 2107 // Process urgent data 2108 if (fSendUrgentOffset > fSendNext) { 2109 segment.flags |= TCP_FLAG_URGENT; 2110 segment.urgent_offset = (fSendUrgentOffset - fSendNext).Number(); 2111 } else { 2112 fSendUrgentOffset = fSendUnacknowledged.Number(); 2113 // Keep urgent offset updated, so that it doesn't reach into our 2114 // send window on overlap 2115 segment.urgent_offset = 0; 2116 } 2117 2118 return segment; 2119 } 2120 2121 2122 status_t 2123 TCPEndpoint::_PrepareAndSend(tcp_segment_header& segment, net_buffer* buffer, 2124 bool isRetransmit) 2125 { 2126 LocalAddress().CopyTo(buffer->source); 2127 PeerAddress().CopyTo(buffer->destination); 2128 2129 uint32 size = buffer->size, segmentLength = size; 2130 segment.sequence = fSendNext.Number(); 2131 2132 TRACE("_PrepareAndSend(): buffer %p (%" B_PRIu32 " bytes) address %s to " 2133 "%s flags %#" B_PRIx8 ", seq %" B_PRIu32 ", ack %" B_PRIu32 2134 ", rwnd %" B_PRIu16 ", cwnd %" B_PRIu32 ", ssthresh %" B_PRIu32 2135 ", len %" B_PRIu32 ", first %" B_PRIu32 ", last %" B_PRIu32, 2136 buffer, buffer->size, PrintAddress(buffer->source), 2137 PrintAddress(buffer->destination), segment.flags, segment.sequence, 2138 segment.acknowledge, segment.advertised_window, 2139 fCongestionWindow, fSlowStartThreshold, segmentLength, 2140 fSendQueue.FirstSequence().Number(), 2141 fSendQueue.LastSequence().Number()); 2142 T(Send(this, segment, buffer, fSendQueue.FirstSequence(), 2143 fSendQueue.LastSequence())); 2144 2145 PROBE(buffer, sendWindow); 2146 2147 status_t status = add_tcp_header(AddressModule(), segment, buffer); 2148 if (status != B_OK) { 2149 gBufferModule->free(buffer); 2150 return status; 2151 } 2152 2153 if (segment.flags & TCP_FLAG_SYNCHRONIZE) { 2154 segment.options &= ~TCP_HAS_WINDOW_SCALE; 2155 segment.max_segment_size = 0; 2156 size++; 2157 } 2158 2159 if (segment.flags & TCP_FLAG_FINISH) 2160 size++; 2161 2162 status = next->module->send_routed_data(next, fRoute, buffer); 2163 if (status < B_OK) { 2164 gBufferModule->free(buffer); 2165 return status; 2166 } 2167 2168 fSendNext += size; 2169 if (fSendMax < fSendNext) 2170 fSendMax = fSendNext; 2171 2172 fReceiveMaxAdvertised = fReceiveNext + segment.AdvertisedWindow(fReceiveWindowShift); 2173 2174 if (segmentLength != 0 && fState == ESTABLISHED) 2175 --fSendMaxSegments; 2176 2177 if (fSendTime == 0 && !isRetransmit 2178 && (segmentLength != 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) != 0)) { 2179 fSendTime = tcp_now(); 2180 fRoundTripStartSequence = segment.sequence; 2181 } 2182 2183 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) { 2184 fLastAcknowledgeSent = segment.acknowledge; 2185 gStackModule->cancel_timer(&fDelayedAcknowledgeTimer); 2186 } 2187 2188 return B_OK; 2189 } 2190 2191 2192 inline bool 2193 TCPEndpoint::_ShouldSendSegment(tcp_segment_header& segment, uint32 length, 2194 uint32 segmentMaxSize, uint32 flightSize) 2195 { 2196 if (fState == ESTABLISHED && fSendMaxSegments == 0) 2197 return false; 2198 2199 if (length > 0) { 2200 // Avoid the silly window syndrome - we only send a segment in case: 2201 // - we have a full segment to send, or 2202 // - we're at the end of our buffer queue, or 2203 // - the buffer is at least larger than half of the maximum send window, 2204 // or 2205 // - we're retransmitting data 2206 if (length == segmentMaxSize 2207 || (fOptions & TCP_NODELAY) != 0 2208 || tcp_sequence(fSendNext + length) == fSendQueue.LastSequence() 2209 || (fSendMaxWindow > 0 && length >= fSendMaxWindow / 2)) 2210 return true; 2211 } 2212 2213 // check if we need to send a window update to the peer 2214 if (segment.advertised_window > 0) { 2215 // correct the window to take into account what already has been advertised 2216 uint32 window = segment.AdvertisedWindow(fReceiveWindowShift) 2217 - (fReceiveMaxAdvertised - fReceiveNext).Number(); 2218 if (fReceiveNext == (fReceiveMaxAdvertised + 1)) 2219 window = 0; 2220 2221 // if we can advertise a window larger than twice the maximum segment 2222 // size, or half the maximum buffer size we send a window update 2223 if (window >= (fReceiveMaxSegmentSize << 1) 2224 || window >= (socket->receive.buffer_size >> 1)) 2225 return true; 2226 } 2227 2228 if ((segment.flags & (TCP_FLAG_SYNCHRONIZE | TCP_FLAG_FINISH 2229 | TCP_FLAG_RESET)) != 0) 2230 return true; 2231 2232 // We do have urgent data pending 2233 if (fSendUrgentOffset > fSendNext) 2234 return true; 2235 2236 // there is no reason to send a segment just now 2237 return false; 2238 } 2239 2240 2241 status_t 2242 TCPEndpoint::_SendAcknowledge(bool force) 2243 { 2244 if (fRoute == NULL || fState == LISTEN) 2245 return B_ERROR; 2246 2247 tcp_segment_header segment = _PrepareSendSegment(); 2248 2249 // Is there actually anything to do? 2250 if (!force && fState == ESTABLISHED 2251 && fLastAcknowledgeSent == fReceiveNext 2252 && fReceiveQueue.IsContiguous() 2253 && !_ShouldSendSegment(segment, 0, 0, 0)) 2254 return B_OK; 2255 2256 net_buffer* buffer = gBufferModule->create(256); 2257 if (buffer == NULL) 2258 return B_NO_MEMORY; 2259 2260 return _PrepareAndSend(segment, buffer, false); 2261 } 2262 2263 2264 /*! Sends one or more TCP segments with the data waiting in the queue. */ 2265 status_t 2266 TCPEndpoint::_SendQueued(bool force) 2267 { 2268 if (fRoute == NULL || fState < ESTABLISHED) 2269 return B_ERROR; 2270 2271 tcp_segment_header segment = _PrepareSendSegment(); 2272 2273 uint32 sendWindow = fSendWindow; 2274 if (fCongestionWindow > 0 && fCongestionWindow < sendWindow) 2275 sendWindow = fCongestionWindow; 2276 2277 // fSendUnacknowledged 2278 // | fSendNext fSendMax 2279 // | | | 2280 // v v v 2281 // ----------------------------------- 2282 // | effective window | 2283 // ----------------------------------- 2284 2285 // Flight size represents the window of data which is currently in the 2286 // ether. We should never send data such as the flight size becomes larger 2287 // than the effective window. Note however that the effective window may be 2288 // reduced (by congestion for instance), so at some point in time flight 2289 // size may be larger than the currently calculated window. 2290 2291 uint32 flightSize = (fSendMax - fSendUnacknowledged).Number(); 2292 uint32 consumedWindow = (fSendNext - fSendUnacknowledged).Number(); 2293 2294 if (consumedWindow > sendWindow) { 2295 sendWindow = 0; 2296 // TODO: enter persist state? try to get a window update. 2297 } else 2298 sendWindow -= consumedWindow; 2299 2300 uint32 length = min_c(fSendQueue.Available(fSendNext), sendWindow); 2301 if (length == 0 && !state_needs_finish(fState)) { 2302 // Nothing to send. 2303 return B_OK; 2304 } 2305 2306 bool shouldStartRetransmitTimer = fSendNext == fSendUnacknowledged; 2307 bool retransmit = fSendNext < fSendMax; 2308 2309 if (fDuplicateAcknowledgeCount != 0) { 2310 // send at most 1 SMSS of data when under limited transmit, fast transmit/recovery 2311 length = min_c(length, fSendMaxSegmentSize); 2312 } 2313 2314 do { 2315 uint32 segmentMaxSize = fSendMaxSegmentSize 2316 - tcp_options_length(segment); 2317 uint32 segmentLength = min_c(length, segmentMaxSize); 2318 2319 if ((fSendNext + segmentLength) == fSendQueue.LastSequence() && !force) { 2320 if (state_needs_finish(fState)) 2321 segment.flags |= TCP_FLAG_FINISH; 2322 if (length > 0) 2323 segment.flags |= TCP_FLAG_PUSH; 2324 } 2325 2326 // Determine if we should really send this segment 2327 if (!force && !retransmit && !_ShouldSendSegment(segment, segmentLength, 2328 segmentMaxSize, flightSize)) { 2329 if (fSendQueue.Available() 2330 && !gStackModule->is_timer_active(&fPersistTimer) 2331 && !gStackModule->is_timer_active(&fRetransmitTimer)) 2332 _StartPersistTimer(); 2333 break; 2334 } 2335 2336 net_buffer *buffer = gBufferModule->create(256); 2337 if (buffer == NULL) 2338 return B_NO_MEMORY; 2339 2340 status_t status = B_OK; 2341 if (segmentLength > 0) 2342 status = fSendQueue.Get(buffer, fSendNext, segmentLength); 2343 if (status < B_OK) { 2344 gBufferModule->free(buffer); 2345 return status; 2346 } 2347 2348 sendWindow -= buffer->size; 2349 2350 status = _PrepareAndSend(segment, buffer, retransmit); 2351 if (status != B_OK) 2352 return status; 2353 2354 if (shouldStartRetransmitTimer) { 2355 TRACE("starting initial retransmit timer of: %" B_PRIdBIGTIME, 2356 fRetransmitTimeout); 2357 gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout); 2358 T(TimerSet(this, "retransmit", fRetransmitTimeout)); 2359 shouldStartRetransmitTimer = false; 2360 } 2361 2362 length -= segmentLength; 2363 segment.flags &= ~(TCP_FLAG_SYNCHRONIZE | TCP_FLAG_RESET 2364 | TCP_FLAG_FINISH); 2365 2366 if (retransmit) 2367 break; 2368 2369 } while (length > 0); 2370 2371 return B_OK; 2372 } 2373 2374 2375 int 2376 TCPEndpoint::_MaxSegmentSize(const sockaddr* address) const 2377 { 2378 return next->module->get_mtu(next, address) - sizeof(tcp_header); 2379 } 2380 2381 2382 status_t 2383 TCPEndpoint::_PrepareSendPath(const sockaddr* peer) 2384 { 2385 if (fRoute == NULL) { 2386 fRoute = gDatalinkModule->get_route(Domain(), peer); 2387 if (fRoute == NULL) 2388 return ENETUNREACH; 2389 2390 if ((fRoute->flags & RTF_LOCAL) != 0) 2391 fFlags |= FLAG_LOCAL; 2392 } 2393 2394 // make sure connection does not already exist 2395 status_t status = fManager->SetConnection(this, *LocalAddress(), peer, 2396 fRoute->interface_address->local); 2397 if (status < B_OK) 2398 return status; 2399 2400 fInitialSendSequence = system_time() >> 4; 2401 fSendNext = fInitialSendSequence; 2402 fSendUnacknowledged = fInitialSendSequence; 2403 fSendMax = fInitialSendSequence; 2404 fSendUrgentOffset = fInitialSendSequence; 2405 fRecover = fInitialSendSequence.Number(); 2406 2407 // we are counting the SYN here 2408 fSendQueue.SetInitialSequence(fSendNext + 1); 2409 2410 fReceiveMaxSegmentSize = _MaxSegmentSize(peer); 2411 2412 // Compute the window shift we advertise to our peer - if it doesn't support 2413 // this option, this will be reset to 0 (when its SYN is received) 2414 fReceiveWindowShift = 0; 2415 while (fReceiveWindowShift < TCP_MAX_WINDOW_SHIFT 2416 && (0xffffUL << fReceiveWindowShift) < socket->receive.buffer_size) { 2417 fReceiveWindowShift++; 2418 } 2419 2420 // Increase to a default of 8 (window minimum 256 bytes, maximum 15 MB.) 2421 if (fReceiveWindowShift < 8 && !IsLocal()) 2422 fReceiveWindowShift = 8; 2423 2424 return B_OK; 2425 } 2426 2427 2428 void 2429 TCPEndpoint::_Acknowledged(tcp_segment_header& segment) 2430 { 2431 TRACE("_Acknowledged(): ack %" B_PRIu32 "; uack %" B_PRIu32 "; next %" 2432 B_PRIu32 "; max %" B_PRIu32, segment.acknowledge, 2433 fSendUnacknowledged.Number(), fSendNext.Number(), fSendMax.Number()); 2434 2435 ASSERT(fSendUnacknowledged <= segment.acknowledge); 2436 2437 if (fSendUnacknowledged < segment.acknowledge) { 2438 fSendQueue.RemoveUntil(segment.acknowledge); 2439 2440 uint32 bytesAcknowledged = segment.acknowledge - fSendUnacknowledged.Number(); 2441 fPreviousHighestAcknowledge = fSendUnacknowledged; 2442 fSendUnacknowledged = segment.acknowledge; 2443 uint32 flightSize = (fSendMax - fSendUnacknowledged).Number(); 2444 int32 expectedSamples = flightSize / (fSendMaxSegmentSize << 1); 2445 2446 if (fPreviousHighestAcknowledge > fSendUnacknowledged) { 2447 // need to update the recover variable upon a sequence wraparound 2448 fRecover = segment.acknowledge - 1; 2449 } 2450 2451 // the acknowledgment of the SYN/ACK MUST NOT increase the size of the congestion window 2452 if (fSendUnacknowledged != fInitialSendSequence) { 2453 if (fCongestionWindow < fSlowStartThreshold) 2454 fCongestionWindow += min_c(bytesAcknowledged, fSendMaxSegmentSize); 2455 else { 2456 uint32 increment = fSendMaxSegmentSize * fSendMaxSegmentSize; 2457 2458 if (increment < fCongestionWindow) 2459 increment = 1; 2460 else 2461 increment /= fCongestionWindow; 2462 2463 fCongestionWindow += increment; 2464 } 2465 2466 fSendMaxSegments = UINT32_MAX; 2467 } 2468 2469 if ((fFlags & FLAG_RECOVERY) != 0) { 2470 fSendNext = fSendUnacknowledged; 2471 _SendQueued(); 2472 fCongestionWindow -= bytesAcknowledged; 2473 2474 if (bytesAcknowledged > fSendMaxSegmentSize) 2475 fCongestionWindow += fSendMaxSegmentSize; 2476 2477 fSendNext = fSendMax; 2478 } else 2479 fDuplicateAcknowledgeCount = 0; 2480 2481 if (fSendNext < fSendUnacknowledged) 2482 fSendNext = fSendUnacknowledged; 2483 2484 if (fFlags & FLAG_OPTION_TIMESTAMP) { 2485 _UpdateRoundTripTime(tcp_diff_timestamp(segment.timestamp_reply), 2486 expectedSamples > 0 ? expectedSamples : 1); 2487 } else if (fSendTime != 0 && fRoundTripStartSequence < segment.acknowledge) { 2488 _UpdateRoundTripTime(tcp_diff_timestamp(fSendTime), 1); 2489 fSendTime = 0; 2490 } 2491 2492 if (fSendUnacknowledged == fSendMax) { 2493 TRACE("all acknowledged, cancelling retransmission timer."); 2494 gStackModule->cancel_timer(&fRetransmitTimer); 2495 T(TimerSet(this, "retransmit", -1)); 2496 } else { 2497 TRACE("data acknowledged, resetting retransmission timer to: %" 2498 B_PRIdBIGTIME, fRetransmitTimeout); 2499 gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout); 2500 T(TimerSet(this, "retransmit", fRetransmitTimeout)); 2501 } 2502 2503 if (is_writable(fState)) { 2504 // notify threads waiting on the socket to become writable again 2505 fSendCondition.NotifyAll(); 2506 gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Free()); 2507 } 2508 } 2509 2510 // if there is data left to be sent, send it now 2511 if (fSendQueue.Used() > 0) 2512 _SendQueued(); 2513 } 2514 2515 2516 void 2517 TCPEndpoint::_Retransmit() 2518 { 2519 TRACE("Retransmit()"); 2520 2521 if (fState < ESTABLISHED) { 2522 fRetransmitTimeout = TCP_SYN_RETRANSMIT_TIMEOUT; 2523 fCongestionWindow = fSendMaxSegmentSize; 2524 } else { 2525 _ResetSlowStart(); 2526 fDuplicateAcknowledgeCount = 0; 2527 // Do exponential back off of the retransmit timeout 2528 fRetransmitTimeout *= 2; 2529 if (fRetransmitTimeout > TCP_MAX_RETRANSMIT_TIMEOUT) 2530 fRetransmitTimeout = TCP_MAX_RETRANSMIT_TIMEOUT; 2531 } 2532 2533 fSendNext = fSendUnacknowledged; 2534 _SendQueued(); 2535 2536 fRecover = fSendNext.Number() - 1; 2537 if ((fFlags & FLAG_RECOVERY) != 0) 2538 fFlags &= ~FLAG_RECOVERY; 2539 } 2540 2541 2542 void 2543 TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime, int32 expectedSamples) 2544 { 2545 if (roundTripTime < 0) 2546 return; 2547 2548 if (fSmoothedRoundTripTime <= 0) { 2549 fSmoothedRoundTripTime = roundTripTime; 2550 fRoundTripVariation = roundTripTime / 2; 2551 fRetransmitTimeout = (fSmoothedRoundTripTime + max_c(100, fRoundTripVariation * 4)) 2552 * kTimestampFactor; 2553 } else { 2554 int32 delta = fSmoothedRoundTripTime - roundTripTime; 2555 if (delta < 0) 2556 delta = -delta; 2557 fRoundTripVariation += (delta - fRoundTripVariation) / (expectedSamples * 4); 2558 fSmoothedRoundTripTime += (roundTripTime - fSmoothedRoundTripTime) / (expectedSamples * 8); 2559 fRetransmitTimeout = (fSmoothedRoundTripTime + max_c(100, fRoundTripVariation * 4)) 2560 * kTimestampFactor; 2561 } 2562 2563 if (fRetransmitTimeout > TCP_MAX_RETRANSMIT_TIMEOUT) 2564 fRetransmitTimeout = TCP_MAX_RETRANSMIT_TIMEOUT; 2565 2566 if (fRetransmitTimeout < TCP_MIN_RETRANSMIT_TIMEOUT) 2567 fRetransmitTimeout = TCP_MIN_RETRANSMIT_TIMEOUT; 2568 2569 TRACE(" RTO is now %" B_PRIdBIGTIME " (after rtt %" B_PRId32 "ms)", 2570 fRetransmitTimeout, roundTripTime); 2571 } 2572 2573 2574 void 2575 TCPEndpoint::_ResetSlowStart() 2576 { 2577 fSlowStartThreshold = max_c((fSendMax - fSendUnacknowledged).Number() / 2, 2578 2 * fSendMaxSegmentSize); 2579 fCongestionWindow = fSendMaxSegmentSize; 2580 } 2581 2582 2583 // #pragma mark - timer 2584 2585 2586 /*static*/ void 2587 TCPEndpoint::_RetransmitTimer(net_timer* timer, void* _endpoint) 2588 { 2589 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2590 T(TimerTriggered(endpoint, "retransmit")); 2591 2592 MutexLocker locker(endpoint->fLock); 2593 if (!locker.IsLocked() || gStackModule->is_timer_active(timer)) 2594 return; 2595 2596 endpoint->_Retransmit(); 2597 } 2598 2599 2600 /*static*/ void 2601 TCPEndpoint::_PersistTimer(net_timer* timer, void* _endpoint) 2602 { 2603 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2604 T(TimerTriggered(endpoint, "persist")); 2605 2606 MutexLocker locker(endpoint->fLock); 2607 if (!locker.IsLocked()) 2608 return; 2609 2610 // the timer might not have been canceled early enough 2611 if (endpoint->State() == CLOSED) 2612 return; 2613 2614 endpoint->_SendQueued(true); 2615 } 2616 2617 2618 /*static*/ void 2619 TCPEndpoint::_DelayedAcknowledgeTimer(net_timer* timer, void* _endpoint) 2620 { 2621 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2622 T(TimerTriggered(endpoint, "delayed ack")); 2623 2624 MutexLocker locker(endpoint->fLock); 2625 if (!locker.IsLocked()) 2626 return; 2627 2628 // the timer might not have been canceled early enough 2629 if (endpoint->State() == CLOSED) 2630 return; 2631 2632 endpoint->_SendAcknowledge(); 2633 } 2634 2635 2636 /*static*/ void 2637 TCPEndpoint::_TimeWaitTimer(net_timer* timer, void* _endpoint) 2638 { 2639 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2640 T(TimerTriggered(endpoint, "time-wait")); 2641 2642 MutexLocker locker(endpoint->fLock); 2643 if (!locker.IsLocked()) 2644 return; 2645 2646 if ((endpoint->fFlags & FLAG_CLOSED) == 0) { 2647 endpoint->fFlags |= FLAG_DELETE_ON_CLOSE; 2648 return; 2649 } 2650 2651 locker.Unlock(); 2652 2653 gSocketModule->release_socket(endpoint->socket); 2654 } 2655 2656 2657 /*static*/ status_t 2658 TCPEndpoint::_WaitForCondition(ConditionVariable& condition, 2659 MutexLocker& locker, bigtime_t timeout) 2660 { 2661 ConditionVariableEntry entry; 2662 condition.Add(&entry); 2663 2664 locker.Unlock(); 2665 status_t result = entry.Wait(B_ABSOLUTE_TIMEOUT | B_CAN_INTERRUPT, timeout); 2666 locker.Lock(); 2667 2668 return result; 2669 } 2670 2671 2672 // #pragma mark - 2673 2674 2675 void 2676 TCPEndpoint::Dump() const 2677 { 2678 kprintf("TCP endpoint %p\n", this); 2679 kprintf(" socket: %p\n", socket); 2680 kprintf(" state: %s\n", name_for_state(fState)); 2681 kprintf(" flags: 0x%" B_PRIx32 "\n", fFlags); 2682 #if KDEBUG 2683 kprintf(" lock: { %p, holder: %" B_PRId32 " }\n", &fLock, fLock.holder); 2684 #endif 2685 kprintf(" accept sem: %" B_PRId32 "\n", fAcceptSemaphore); 2686 kprintf(" options: 0x%" B_PRIx32 "\n", (uint32)fOptions); 2687 kprintf(" send\n"); 2688 kprintf(" window shift: %" B_PRIu8 "\n", fSendWindowShift); 2689 kprintf(" unacknowledged: %" B_PRIu32 "\n", 2690 fSendUnacknowledged.Number()); 2691 kprintf(" next: %" B_PRIu32 "\n", fSendNext.Number()); 2692 kprintf(" max: %" B_PRIu32 "\n", fSendMax.Number()); 2693 kprintf(" urgent offset: %" B_PRIu32 "\n", fSendUrgentOffset.Number()); 2694 kprintf(" window: %" B_PRIu32 "\n", fSendWindow); 2695 kprintf(" max window: %" B_PRIu32 "\n", fSendMaxWindow); 2696 kprintf(" max segment size: %" B_PRIu32 "\n", fSendMaxSegmentSize); 2697 kprintf(" queue: %" B_PRIuSIZE " / %" B_PRIuSIZE "\n", fSendQueue.Used(), 2698 fSendQueue.Size()); 2699 #if DEBUG_TCP_BUFFER_QUEUE 2700 fSendQueue.Dump(); 2701 #endif 2702 kprintf(" last acknowledge sent: %" B_PRIu32 "\n", 2703 fLastAcknowledgeSent.Number()); 2704 kprintf(" initial sequence: %" B_PRIu32 "\n", 2705 fInitialSendSequence.Number()); 2706 kprintf(" receive\n"); 2707 kprintf(" window shift: %" B_PRIu8 "\n", fReceiveWindowShift); 2708 kprintf(" next: %" B_PRIu32 "\n", fReceiveNext.Number()); 2709 kprintf(" max advertised: %" B_PRIu32 "\n", 2710 fReceiveMaxAdvertised.Number()); 2711 kprintf(" window: %" B_PRIu32 "\n", fReceiveWindow); 2712 kprintf(" max segment size: %" B_PRIu32 "\n", fReceiveMaxSegmentSize); 2713 kprintf(" queue: %" B_PRIuSIZE " / %" B_PRIuSIZE "\n", 2714 fReceiveQueue.Available(), fReceiveQueue.Size()); 2715 #if DEBUG_TCP_BUFFER_QUEUE 2716 fReceiveQueue.Dump(); 2717 #endif 2718 kprintf(" initial sequence: %" B_PRIu32 "\n", 2719 fInitialReceiveSequence.Number()); 2720 kprintf(" duplicate acknowledge count: %" B_PRIu32 "\n", 2721 fDuplicateAcknowledgeCount); 2722 kprintf(" smoothed round trip time: %" B_PRId32 " (deviation %" B_PRId32 ")\n", 2723 fSmoothedRoundTripTime, fRoundTripVariation); 2724 kprintf(" retransmit timeout: %" B_PRId64 "\n", fRetransmitTimeout); 2725 kprintf(" congestion window: %" B_PRIu32 "\n", fCongestionWindow); 2726 kprintf(" slow start threshold: %" B_PRIu32 "\n", fSlowStartThreshold); 2727 } 2728 2729