1 /* 2 * Copyright 2006-2008, Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Andrew Galante, haiku.galante@gmail.com 7 * Axel Dörfler, axeld@pinc-software.de 8 * Hugo Santos, hugosantos@gmail.com 9 */ 10 11 12 #include "TCPEndpoint.h" 13 14 #include <netinet/in.h> 15 #include <netinet/ip.h> 16 #include <netinet/tcp.h> 17 #include <new> 18 #include <signal.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 #include <KernelExport.h> 23 #include <Select.h> 24 25 #include <net_buffer.h> 26 #include <net_datalink.h> 27 #include <net_stat.h> 28 #include <NetBufferUtilities.h> 29 #include <NetUtilities.h> 30 31 #include <lock.h> 32 #include <tracing.h> 33 #include <util/AutoLock.h> 34 #include <util/khash.h> 35 #include <util/list.h> 36 37 #include "EndpointManager.h" 38 39 40 // References: 41 // - RFC 793 - Transmission Control Protocol 42 // - RFC 813 - Window and Acknowledgement Strategy in TCP 43 // - RFC 1337 - TIME_WAIT Assassination Hazards in TCP 44 // 45 // Things this implementation currently doesn't implement: 46 // - TCP Slow Start, Congestion Avoidance, Fast Retransmit, and Fast Recovery, 47 // RFC 2001, RFC 2581, RFC 3042 48 // - NewReno Modification to TCP's Fast Recovery, RFC 2582 49 // - Explicit Congestion Notification (ECN), RFC 3168 50 // - SYN-Cache 51 // - TCP Extensions for High Performance, RFC 1323 52 // - SACK, Selective Acknowledgment - RFC 2018, RFC 2883, RFC 3517 53 // - Forward RTO-Recovery, RFC 4138 54 // - Time-Wait hash instead of keeping sockets alive 55 56 #define PrintAddress(address) \ 57 AddressString(Domain(), address, true).Data() 58 59 //#define TRACE_TCP 60 //#define PROBE_TCP 61 62 #ifdef TRACE_TCP 63 // the space before ', ##args' is important in order for this to work with cpp 2.95 64 # define TRACE(format, args...) dprintf("%ld: TCP [%llu] %p (%12s) " \ 65 format "\n", find_thread(NULL), system_time(), this, \ 66 name_for_state(fState) , ##args) 67 #else 68 # define TRACE(args...) do { } while (0) 69 #endif 70 71 #ifdef PROBE_TCP 72 # define PROBE(buffer, window) \ 73 dprintf("TCP PROBE %llu %s %s %ld snxt %lu suna %lu cw %lu sst %lu win %lu swin %lu smax-suna %lu savail %lu sqused %lu rto %llu\n", \ 74 system_time(), PrintAddress(buffer->source), \ 75 PrintAddress(buffer->destination), buffer->size, (uint32)fSendNext, \ 76 (uint32)fSendUnacknowledged, fCongestionWindow, fSlowStartThreshold, \ 77 window, fSendWindow, (uint32)(fSendMax - fSendUnacknowledged), \ 78 fSendQueue.Available(fSendNext), fSendQueue.Used(), fRetransmitTimeout) 79 #else 80 # define PROBE(buffer, window) do { } while (0) 81 #endif 82 83 #if TCP_TRACING 84 namespace TCPTracing { 85 86 class Receive : public AbstractTraceEntry { 87 public: 88 Receive(TCPEndpoint* endpoint, tcp_segment_header& segment, uint32 window, 89 net_buffer* buffer) 90 : 91 fEndpoint(endpoint), 92 fBuffer(buffer), 93 fBufferSize(buffer->size), 94 fSequence(segment.sequence), 95 fAcknowledge(segment.acknowledge), 96 fWindow(window), 97 fState(endpoint->State()), 98 fFlags(segment.flags) 99 { 100 Initialized(); 101 } 102 103 virtual void AddDump(TraceOutput& out) 104 { 105 out.Print("tcp:%p (%12s) receive buffer %p (%lu bytes), flags %x, " 106 "seq %lu, ack %lu, wnd %lu", fEndpoint, name_for_state(fState), 107 fBuffer, fBufferSize, fFlags, fSequence, fAcknowledge, fWindow); 108 } 109 110 protected: 111 TCPEndpoint* fEndpoint; 112 net_buffer* fBuffer; 113 uint32 fBufferSize; 114 uint32 fSequence; 115 uint32 fAcknowledge; 116 uint32 fWindow; 117 tcp_state fState; 118 uint8 fFlags; 119 }; 120 121 class Send : public AbstractTraceEntry { 122 public: 123 Send(TCPEndpoint* endpoint, tcp_segment_header& segment, net_buffer* buffer, 124 uint32 firstSequence, uint32 lastSequence) 125 : 126 fEndpoint(endpoint), 127 fBuffer(buffer), 128 fBufferSize(buffer->size), 129 fSequence(segment.sequence), 130 fAcknowledge(segment.acknowledge), 131 fFirstSequence(firstSequence), 132 fLastSequence(lastSequence), 133 fState(endpoint->State()), 134 fFlags(segment.flags) 135 { 136 Initialized(); 137 } 138 139 virtual void AddDump(TraceOutput& out) 140 { 141 out.Print("tcp:%p (%12s) send buffer %p (%lu bytes), flags %x, " 142 "seq %lu, ack %lu, first %lu, last %lu", 143 fEndpoint, name_for_state(fState), fBuffer, fBufferSize, fFlags, 144 fSequence, fAcknowledge, fFirstSequence, fLastSequence); 145 } 146 147 protected: 148 TCPEndpoint* fEndpoint; 149 net_buffer* fBuffer; 150 uint32 fBufferSize; 151 uint32 fSequence; 152 uint32 fAcknowledge; 153 uint32 fFirstSequence; 154 uint32 fLastSequence; 155 tcp_state fState; 156 uint8 fFlags; 157 }; 158 159 class State : public AbstractTraceEntry { 160 public: 161 State(TCPEndpoint* endpoint) 162 : 163 fEndpoint(endpoint), 164 fState(endpoint->State()) 165 { 166 Initialized(); 167 } 168 169 virtual void AddDump(TraceOutput& out) 170 { 171 out.Print("tcp:%p (%12s) state change", fEndpoint, 172 name_for_state(fState)); 173 } 174 175 protected: 176 TCPEndpoint* fEndpoint; 177 tcp_state fState; 178 }; 179 180 class Timer : public AbstractTraceEntry { 181 public: 182 Timer(TCPEndpoint* endpoint, const char* which) 183 : 184 fEndpoint(endpoint), 185 fWhich(which), 186 fState(endpoint->State()) 187 { 188 Initialized(); 189 } 190 191 virtual void AddDump(TraceOutput& out) 192 { 193 out.Print("tcp:%p (%12s) %s timer", fEndpoint, 194 name_for_state(fState), fWhich); 195 } 196 197 protected: 198 TCPEndpoint* fEndpoint; 199 const char* fWhich; 200 tcp_state fState; 201 }; 202 203 } // namespace TCPTracing 204 205 # define T(x) new(std::nothrow) TCPTracing::x 206 #else 207 # define T(x) 208 #endif // TCP_TRACING 209 210 // Initial estimate for packet round trip time (RTT) 211 #define TCP_INITIAL_RTT 2000000 212 213 // constants for the fFlags field 214 enum { 215 FLAG_OPTION_WINDOW_SCALE = 0x01, 216 FLAG_OPTION_TIMESTAMP = 0x02, 217 // TODO: Should FLAG_NO_RECEIVE apply as well to received connections? 218 // That is, what is expected from accept() after a shutdown() 219 // is performed on a listen()ing socket. 220 FLAG_NO_RECEIVE = 0x04, 221 FLAG_CLOSED = 0x08, 222 FLAG_DELETE_ON_CLOSE = 0x10, 223 }; 224 225 226 static const int kTimestampFactor = 1024; 227 228 229 static inline bigtime_t 230 absolute_timeout(bigtime_t timeout) 231 { 232 if (timeout == 0 || timeout == B_INFINITE_TIMEOUT) 233 return timeout; 234 235 return timeout + system_time(); 236 } 237 238 239 static inline status_t 240 posix_error(status_t error) 241 { 242 if (error == B_TIMED_OUT) 243 return B_WOULD_BLOCK; 244 245 return error; 246 } 247 248 249 static inline bool 250 in_window(const tcp_sequence& sequence, const tcp_sequence& receiveNext, 251 uint32 receiveWindow) 252 { 253 return sequence >= receiveNext && sequence < (receiveNext + receiveWindow); 254 } 255 256 257 static inline bool 258 segment_in_sequence(const tcp_segment_header& segment, int size, 259 const tcp_sequence& receiveNext, uint32 receiveWindow) 260 { 261 tcp_sequence sequence(segment.sequence); 262 if (size == 0) { 263 if (receiveWindow == 0) 264 return sequence == receiveNext; 265 return in_window(sequence, receiveNext, receiveWindow); 266 } else { 267 if (receiveWindow == 0) 268 return false; 269 return in_window(sequence, receiveNext, receiveWindow) 270 || in_window(sequence + size - 1, receiveNext, receiveWindow); 271 } 272 } 273 274 275 static inline bool 276 is_writable(tcp_state state) 277 { 278 return state == SYNCHRONIZE_SENT || state == SYNCHRONIZE_RECEIVED 279 || state == ESTABLISHED || state == FINISH_RECEIVED; 280 } 281 282 283 static inline uint32 tcp_now() 284 { 285 return system_time() / kTimestampFactor; 286 } 287 288 289 static inline uint32 tcp_diff_timestamp(uint32 base) 290 { 291 uint32 now = tcp_now(); 292 293 if (now > base) 294 return now - base; 295 296 return now + UINT_MAX - base; 297 } 298 299 300 static inline bool 301 state_needs_finish(int32 state) 302 { 303 return state == WAIT_FOR_FINISH_ACKNOWLEDGE 304 || state == FINISH_SENT || state == CLOSING; 305 } 306 307 308 // #pragma mark - 309 310 311 WaitList::WaitList(const char* name) 312 { 313 fCondition = 0; 314 fSem = create_sem(0, name); 315 } 316 317 318 WaitList::~WaitList() 319 { 320 delete_sem(fSem); 321 } 322 323 324 status_t 325 WaitList::InitCheck() const 326 { 327 return fSem; 328 } 329 330 331 status_t 332 WaitList::Wait(MutexLocker& locker, bigtime_t timeout) 333 { 334 locker.Unlock(); 335 336 status_t status = B_OK; 337 338 while (!atomic_test_and_set(&fCondition, 0, 1)) { 339 status = acquire_sem_etc(fSem, 1, B_ABSOLUTE_TIMEOUT | B_CAN_INTERRUPT, 340 timeout); 341 if (status != B_OK) 342 break; 343 } 344 345 locker.Lock(); 346 return status; 347 } 348 349 350 void 351 WaitList::Signal() 352 { 353 atomic_or(&fCondition, 1); 354 #ifdef __HAIKU__ 355 release_sem_etc(fSem, 1, B_DO_NOT_RESCHEDULE | B_RELEASE_ALL); 356 #else 357 int32 count; 358 if (get_sem_count(fSem, &count) == B_OK && count < 0) 359 release_sem_etc(fSem, -count, B_DO_NOT_RESCHEDULE); 360 #endif 361 } 362 363 364 // #pragma mark - 365 366 367 TCPEndpoint::TCPEndpoint(net_socket* socket) 368 : ProtocolSocket(socket), 369 fManager(NULL), 370 fReceiveList("tcp receive"), 371 fSendList("tcp send"), 372 fOptions(0), 373 fSendWindowShift(0), 374 fReceiveWindowShift(0), 375 fSendUnacknowledged(0), 376 fSendNext(0), 377 fSendMax(0), 378 fSendUrgentOffset(0), 379 fSendWindow(0), 380 fSendMaxWindow(0), 381 fSendMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), 382 fSendQueue(socket->send.buffer_size), 383 fInitialSendSequence(0), 384 fDuplicateAcknowledgeCount(0), 385 fRoute(NULL), 386 fReceiveNext(0), 387 fReceiveMaxAdvertised(0), 388 fReceiveWindow(socket->receive.buffer_size), 389 fReceiveMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), 390 fReceiveQueue(socket->receive.buffer_size), 391 fRoundTripTime(TCP_INITIAL_RTT / kTimestampFactor), 392 fRoundTripDeviation(TCP_INITIAL_RTT / kTimestampFactor), 393 fRetransmitTimeout(TCP_INITIAL_RTT), 394 fReceivedTimestamp(0), 395 fCongestionWindow(0), 396 fSlowStartThreshold(0), 397 fState(CLOSED), 398 fFlags(FLAG_OPTION_WINDOW_SCALE | FLAG_OPTION_TIMESTAMP) 399 { 400 // TODO: to be replaced with a real read/write locking strategy! 401 mutex_init(&fLock, "tcp lock"); 402 403 gStackModule->init_timer(&fPersistTimer, TCPEndpoint::_PersistTimer, this); 404 gStackModule->init_timer(&fRetransmitTimer, TCPEndpoint::_RetransmitTimer, 405 this); 406 gStackModule->init_timer(&fDelayedAcknowledgeTimer, 407 TCPEndpoint::_DelayedAcknowledgeTimer, this); 408 gStackModule->init_timer(&fTimeWaitTimer, TCPEndpoint::_TimeWaitTimer, this); 409 } 410 411 412 TCPEndpoint::~TCPEndpoint() 413 { 414 mutex_lock(&fLock); 415 416 _CancelConnectionTimers(); 417 gStackModule->cancel_timer(&fTimeWaitTimer); 418 419 if (fManager != NULL) { 420 fManager->Unbind(this); 421 put_endpoint_manager(fManager); 422 } 423 424 mutex_destroy(&fLock); 425 426 // TODO: we need to wait for all timers to return 427 //_WaitForTimers(); 428 } 429 430 431 status_t 432 TCPEndpoint::InitCheck() const 433 { 434 if (fReceiveList.InitCheck() < B_OK) 435 return fReceiveList.InitCheck(); 436 437 if (fSendList.InitCheck() < B_OK) 438 return fSendList.InitCheck(); 439 440 return B_OK; 441 } 442 443 444 // #pragma mark - protocol API 445 446 447 status_t 448 TCPEndpoint::Open() 449 { 450 TRACE("Open()"); 451 452 status_t status = ProtocolSocket::Open(); 453 if (status < B_OK) 454 return status; 455 456 fManager = get_endpoint_manager(Domain()); 457 if (fManager == NULL) 458 return EAFNOSUPPORT; 459 460 return B_OK; 461 } 462 463 464 status_t 465 TCPEndpoint::Close() 466 { 467 TRACE("Close()"); 468 469 MutexLocker lock(fLock); 470 471 if (fState == LISTEN) 472 delete_sem(fAcceptSemaphore); 473 474 if (fState == SYNCHRONIZE_SENT || fState == LISTEN) { 475 // TODO: what about linger in case of SYNCHRONIZE_SENT? 476 fState = CLOSED; 477 T(State(this)); 478 return B_OK; 479 } 480 481 status_t status = _Disconnect(true); 482 if (status != B_OK) 483 return status; 484 485 if (socket->options & SO_LINGER) { 486 TRACE("Close(): Lingering for %i secs", socket->linger); 487 488 bigtime_t maximum = absolute_timeout(socket->linger * 1000000LL); 489 490 while (fSendQueue.Used() > 0) { 491 status = fSendList.Wait(lock, maximum); 492 if (status == B_TIMED_OUT || status == B_WOULD_BLOCK) 493 break; 494 else if (status < B_OK) 495 return status; 496 } 497 498 TRACE("Close(): after waiting, the SendQ was left with %lu bytes.", 499 fSendQueue.Used()); 500 } 501 502 return B_OK; 503 } 504 505 506 status_t 507 TCPEndpoint::Free() 508 { 509 TRACE("Free()"); 510 511 MutexLocker _(fLock); 512 513 if (fState <= SYNCHRONIZE_SENT) 514 return B_OK; 515 516 // we are only interested in the timer, not in changing state 517 _EnterTimeWait(); 518 519 fFlags |= FLAG_CLOSED; 520 if ((fFlags & FLAG_DELETE_ON_CLOSE) != 0) 521 return B_OK; 522 523 return B_BUSY; 524 // we'll be freed later when the 2MSL timer expires 525 } 526 527 528 /*! Creates and sends a synchronize packet to /a address, and then waits 529 until the connection has been established or refused. 530 */ 531 status_t 532 TCPEndpoint::Connect(const sockaddr* address) 533 { 534 TRACE("Connect() on address %s", PrintAddress(address)); 535 536 MutexLocker locker(fLock); 537 538 if (gStackModule->is_restarted_syscall()) { 539 bigtime_t timeout = gStackModule->restore_syscall_restart_timeout(); 540 status_t status = _WaitForEstablished(locker, timeout); 541 TRACE(" Connect(): Connection complete: %s (timeout was %llu)", 542 strerror(status), timeout); 543 return posix_error(status); 544 } 545 546 // Can only call connect() from CLOSED or LISTEN states 547 // otherwise endpoint is considered already connected 548 if (fState == LISTEN) { 549 // this socket is about to connect; remove pending connections in the backlog 550 gSocketModule->set_max_backlog(socket, 0); 551 } else if (fState == ESTABLISHED) { 552 return EISCONN; 553 } else if (fState != CLOSED) 554 return EINPROGRESS; 555 556 // TODO: this is IPv4 specific, and doesn't belong here! 557 // consider destination address INADDR_ANY as INADDR_LOOPBACK 558 sockaddr_in _address; 559 if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY) { 560 memcpy(&_address, address, sizeof(sockaddr_in)); 561 _address.sin_len = sizeof(sockaddr_in); 562 _address.sin_addr.s_addr = INADDR_LOOPBACK; 563 address = (sockaddr*)&_address; 564 } 565 566 status_t status = _PrepareSendPath(address); 567 if (status < B_OK) 568 return status; 569 570 TRACE(" Connect(): starting 3-way handshake..."); 571 572 fState = SYNCHRONIZE_SENT; 573 T(State(this)); 574 575 // send SYN 576 status = _SendQueued(); 577 if (status != B_OK) { 578 _Close(); 579 return status; 580 } 581 582 // If we are running over Loopback, after _SendQueued() returns we 583 // may be in ESTABLISHED already. 584 if (fState == ESTABLISHED) { 585 TRACE(" Connect() completed after _SendQueued()"); 586 return B_OK; 587 } 588 589 // wait until 3-way handshake is complete (if needed) 590 bigtime_t timeout = min_c(socket->send.timeout, TCP_CONNECTION_TIMEOUT); 591 if (timeout == 0) { 592 // we're a non-blocking socket 593 TRACE(" Connect() delayed, return EINPROGRESS"); 594 return EINPROGRESS; 595 } 596 597 bigtime_t absoluteTimeout = absolute_timeout(timeout); 598 gStackModule->store_syscall_restart_timeout(absoluteTimeout); 599 600 status = _WaitForEstablished(locker, absoluteTimeout); 601 TRACE(" Connect(): Connection complete: %s (timeout was %llu)", 602 strerror(status), timeout); 603 return posix_error(status); 604 } 605 606 607 status_t 608 TCPEndpoint::Accept(struct net_socket** _acceptedSocket) 609 { 610 TRACE("Accept()"); 611 612 MutexLocker locker(fLock); 613 614 status_t status; 615 bigtime_t timeout = absolute_timeout(socket->receive.timeout); 616 if (gStackModule->is_restarted_syscall()) 617 timeout = gStackModule->restore_syscall_restart_timeout(); 618 else 619 gStackModule->store_syscall_restart_timeout(timeout); 620 621 do { 622 locker.Unlock(); 623 624 status = acquire_sem_etc(fAcceptSemaphore, 1, B_ABSOLUTE_TIMEOUT 625 | B_CAN_INTERRUPT, timeout); 626 if (status < B_OK) 627 return status; 628 629 locker.Lock(); 630 status = gSocketModule->dequeue_connected(socket, _acceptedSocket); 631 #ifdef TRACE_TCP 632 if (status == B_OK) 633 TRACE(" Accept() returning %p", (*_acceptedSocket)->first_protocol); 634 #endif 635 } while (status < B_OK); 636 637 return status; 638 } 639 640 641 status_t 642 TCPEndpoint::Bind(const sockaddr *address) 643 { 644 if (address == NULL) 645 return B_BAD_VALUE; 646 647 MutexLocker lock(fLock); 648 649 TRACE("Bind() on address %s", PrintAddress(address)); 650 651 if (fState != CLOSED) 652 return EISCONN; 653 654 return fManager->Bind(this, address); 655 } 656 657 658 status_t 659 TCPEndpoint::Unbind(struct sockaddr *address) 660 { 661 TRACE("Unbind()"); 662 663 MutexLocker lock(fLock); 664 return fManager->Unbind(this); 665 } 666 667 668 status_t 669 TCPEndpoint::Listen(int count) 670 { 671 TRACE("Listen()"); 672 673 MutexLocker lock(fLock); 674 675 if (fState != CLOSED) 676 return B_BAD_VALUE; 677 678 fAcceptSemaphore = create_sem(0, "tcp accept"); 679 if (fAcceptSemaphore < B_OK) 680 return ENOBUFS; 681 682 status_t status = fManager->SetPassive(this); 683 if (status < B_OK) { 684 delete_sem(fAcceptSemaphore); 685 fAcceptSemaphore = -1; 686 return status; 687 } 688 689 fState = LISTEN; 690 T(State(this)); 691 return B_OK; 692 } 693 694 695 status_t 696 TCPEndpoint::Shutdown(int direction) 697 { 698 TRACE("Shutdown(%i)", direction); 699 700 MutexLocker lock(fLock); 701 702 if (direction == SHUT_RD || direction == SHUT_RDWR) 703 fFlags |= FLAG_NO_RECEIVE; 704 705 if (direction == SHUT_WR || direction == SHUT_RDWR) 706 _Disconnect(false); 707 708 return B_OK; 709 } 710 711 712 /*! Puts data contained in \a buffer into send buffer */ 713 status_t 714 TCPEndpoint::SendData(net_buffer *buffer) 715 { 716 MutexLocker lock(fLock); 717 718 TRACE("SendData(buffer %p, size %lu, flags %lx) [total %lu bytes, has %lu]", 719 buffer, buffer->size, buffer->flags, fSendQueue.Size(), 720 fSendQueue.Free()); 721 722 if (fState == CLOSED) 723 return ENOTCONN; 724 if (fState == LISTEN) 725 return EDESTADDRREQ; 726 if (fState == FINISH_SENT || fState == FINISH_ACKNOWLEDGED 727 || fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE 728 || fState == TIME_WAIT) { 729 // we only send signals when called from userland 730 if (gStackModule->is_syscall) 731 send_signal(find_thread(NULL), SIGPIPE); 732 return EPIPE; 733 } 734 735 uint32 flags = buffer->flags; 736 size_t left = buffer->size; 737 738 bigtime_t timeout = absolute_timeout(socket->send.timeout); 739 if (gStackModule->is_restarted_syscall()) 740 timeout = gStackModule->restore_syscall_restart_timeout(); 741 else 742 gStackModule->store_syscall_restart_timeout(timeout); 743 744 while (left > 0) { 745 while (fSendQueue.Free() < socket->send.low_water_mark) { 746 // wait until enough space is available 747 status_t status = fSendList.Wait(lock, timeout); 748 if (status < B_OK) { 749 TRACE(" SendData() returning %s (%d)", 750 strerror(posix_error(status)), (int)posix_error(status)); 751 return posix_error(status); 752 } 753 } 754 755 size_t size = fSendQueue.Free(); 756 if (size < left) { 757 // we need to split the original buffer 758 net_buffer* clone = gBufferModule->clone(buffer, false); 759 // TODO: add offset/size parameter to net_buffer::clone() or 760 // even a move_data() function, as this is a bit inefficient 761 if (clone == NULL) 762 return ENOBUFS; 763 764 status_t status = gBufferModule->trim(clone, size); 765 if (status != B_OK) { 766 gBufferModule->free(clone); 767 return status; 768 } 769 770 gBufferModule->remove_header(buffer, size); 771 left -= size; 772 fSendQueue.Add(clone); 773 } else { 774 left -= buffer->size; 775 fSendQueue.Add(buffer); 776 } 777 } 778 779 TRACE(" SendData(): %lu bytes used.", fSendQueue.Used()); 780 781 bool force = false; 782 if ((flags & MSG_OOB) != 0) { 783 fSendUrgentOffset = fSendQueue.LastSequence(); 784 // RFC 961 specifies that the urgent offset points to the last 785 // byte of urgent data. However, this is commonly implemented as 786 // here, ie. it points to the first byte after the urgent data. 787 force = true; 788 } 789 if ((flags & MSG_EOF) != 0) 790 _Disconnect(false); 791 792 if (fState == ESTABLISHED || fState == FINISH_RECEIVED) 793 _SendQueued(force); 794 795 return B_OK; 796 } 797 798 799 ssize_t 800 TCPEndpoint::SendAvailable() 801 { 802 MutexLocker locker(fLock); 803 804 ssize_t available; 805 806 if (is_writable(fState)) 807 available = fSendQueue.Free(); 808 else 809 available = EPIPE; 810 811 TRACE("SendAvailable(): %li", available); 812 return available; 813 } 814 815 816 status_t 817 TCPEndpoint::FillStat(net_stat *stat) 818 { 819 MutexLocker _(fLock); 820 821 strlcpy(stat->state, name_for_state(fState), sizeof(stat->state)); 822 stat->receive_queue_size = fReceiveQueue.Available(); 823 stat->send_queue_size = fSendQueue.Used(); 824 825 return B_OK; 826 } 827 828 829 status_t 830 TCPEndpoint::ReadData(size_t numBytes, uint32 flags, net_buffer** _buffer) 831 { 832 TRACE("ReadData(%lu bytes, flags 0x%x)", numBytes, (unsigned int)flags); 833 834 MutexLocker locker(fLock); 835 836 *_buffer = NULL; 837 838 if (fState == CLOSED) 839 return ENOTCONN; 840 841 bigtime_t timeout = absolute_timeout(socket->receive.timeout); 842 if (gStackModule->is_restarted_syscall()) 843 timeout = gStackModule->restore_syscall_restart_timeout(); 844 else 845 gStackModule->store_syscall_restart_timeout(timeout); 846 847 if (fState == SYNCHRONIZE_SENT || fState == SYNCHRONIZE_RECEIVED) { 848 if (flags & MSG_DONTWAIT) 849 return B_WOULD_BLOCK; 850 851 status_t status = _WaitForEstablished(locker, timeout); 852 if (status < B_OK) 853 return posix_error(status); 854 } 855 856 size_t dataNeeded = socket->receive.low_water_mark; 857 858 // When MSG_WAITALL is set then the function should block 859 // until the full amount of data can be returned. 860 if (flags & MSG_WAITALL) 861 dataNeeded = numBytes; 862 863 // TODO: add support for urgent data (MSG_OOB) 864 865 while (true) { 866 if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE 867 || fState == TIME_WAIT) { 868 // ``Connection closing''. 869 return B_OK; 870 } 871 872 if (fReceiveQueue.Available() > 0) { 873 if (fReceiveQueue.Available() >= dataNeeded 874 || (fReceiveQueue.PushedData() > 0 875 && fReceiveQueue.PushedData() >= fReceiveQueue.Available())) 876 break; 877 } else if (fState == FINISH_RECEIVED) { 878 // ``If no text is awaiting delivery, the RECEIVE will 879 // get a Connection closing''. 880 return B_OK; 881 } 882 883 if ((flags & MSG_DONTWAIT) != 0 || socket->receive.timeout == 0) 884 return B_WOULD_BLOCK; 885 886 status_t status = fReceiveList.Wait(locker, timeout); 887 if (status < B_OK) { 888 // The Open Group base specification mentions that EINTR should be 889 // returned if the recv() is interrupted before _any data_ is 890 // available. So we actually check if there is data, and if so, 891 // push it to the user. 892 if ((status == B_TIMED_OUT || status == B_INTERRUPTED) 893 && fReceiveQueue.Available() > 0) 894 break; 895 896 return posix_error(status); 897 } 898 } 899 900 TRACE(" ReadData(): %lu are available.", fReceiveQueue.Available()); 901 902 if (numBytes < fReceiveQueue.Available()) 903 fReceiveList.Signal(); 904 905 bool clone = (flags & MSG_PEEK) != 0; 906 907 ssize_t receivedBytes = fReceiveQueue.Get(numBytes, !clone, _buffer); 908 909 TRACE(" ReadData(): %lu bytes kept.", fReceiveQueue.Available()); 910 911 // if we are opening the window, check if we should send an ACK 912 if (!clone) 913 SendAcknowledge(false); 914 915 return receivedBytes; 916 } 917 918 919 ssize_t 920 TCPEndpoint::ReadAvailable() 921 { 922 MutexLocker locker(fLock); 923 924 TRACE("ReadAvailable(): %li", _AvailableData()); 925 926 return _AvailableData(); 927 } 928 929 930 status_t 931 TCPEndpoint::SetSendBufferSize(size_t length) 932 { 933 MutexLocker _(fLock); 934 fSendQueue.SetMaxBytes(length); 935 return B_OK; 936 } 937 938 939 status_t 940 TCPEndpoint::SetReceiveBufferSize(size_t length) 941 { 942 MutexLocker _(fLock); 943 fReceiveQueue.SetMaxBytes(length); 944 return B_OK; 945 } 946 947 948 status_t 949 TCPEndpoint::GetOption(int option, void* _value, int* _length) 950 { 951 if (*_length != sizeof(int)) 952 return B_BAD_VALUE; 953 954 int* value = (int*)_value; 955 956 switch (option) { 957 case TCP_NODELAY: 958 if ((fOptions & TCP_NODELAY) != 0) 959 *value = 1; 960 else 961 *value = 0; 962 return B_OK; 963 964 case TCP_MAXSEG: 965 *value = fReceiveMaxSegmentSize; 966 return B_OK; 967 968 default: 969 return B_BAD_VALUE; 970 } 971 } 972 973 974 status_t 975 TCPEndpoint::SetOption(int option, const void* _value, int length) 976 { 977 if (option != TCP_NODELAY) 978 return B_BAD_VALUE; 979 980 if (length != sizeof(int)) 981 return B_BAD_VALUE; 982 983 const int* value = (const int*)_value; 984 985 MutexLocker _(fLock); 986 if (*value) 987 fOptions |= TCP_NODELAY; 988 else 989 fOptions &= ~TCP_NODELAY; 990 991 return B_OK; 992 } 993 994 995 // #pragma mark - misc 996 997 998 bool 999 TCPEndpoint::IsBound() const 1000 { 1001 return !LocalAddress().IsEmpty(true); 1002 } 1003 1004 1005 status_t 1006 TCPEndpoint::DelayedAcknowledge() 1007 { 1008 if (gStackModule->cancel_timer(&fDelayedAcknowledgeTimer)) { 1009 // timer was active, send an ACK now (with the exception above, 1010 // we send every other ACK) 1011 return SendAcknowledge(true); 1012 } 1013 1014 gStackModule->set_timer(&fDelayedAcknowledgeTimer, 1015 TCP_DELAYED_ACKNOWLEDGE_TIMEOUT); 1016 return B_OK; 1017 } 1018 1019 1020 status_t 1021 TCPEndpoint::SendAcknowledge(bool force) 1022 { 1023 return _SendQueued(force, 0); 1024 } 1025 1026 1027 void 1028 TCPEndpoint::_StartPersistTimer() 1029 { 1030 gStackModule->set_timer(&fPersistTimer, 1000000LL); 1031 } 1032 1033 1034 void 1035 TCPEndpoint::_EnterTimeWait() 1036 { 1037 TRACE("_EnterTimeWait()\n"); 1038 1039 _CancelConnectionTimers(); 1040 1041 if (fState == TIME_WAIT && fRoute != NULL 1042 && (fRoute->flags & RTF_LOCAL) != 0) { 1043 // we do not use TIME_WAIT state for local connections 1044 fFlags |= FLAG_DELETE_ON_CLOSE; 1045 return; 1046 } 1047 1048 _UpdateTimeWait(); 1049 } 1050 1051 1052 void 1053 TCPEndpoint::_UpdateTimeWait() 1054 { 1055 gStackModule->set_timer(&fTimeWaitTimer, TCP_MAX_SEGMENT_LIFETIME << 1); 1056 } 1057 1058 1059 void 1060 TCPEndpoint::_CancelConnectionTimers() 1061 { 1062 gStackModule->cancel_timer(&fRetransmitTimer); 1063 gStackModule->cancel_timer(&fPersistTimer); 1064 gStackModule->cancel_timer(&fDelayedAcknowledgeTimer); 1065 } 1066 1067 1068 /*! Sends the FIN flag to the peer when the connection is still open. 1069 Moves the endpoint to the next state depending on where it was. 1070 */ 1071 status_t 1072 TCPEndpoint::_Disconnect(bool closing) 1073 { 1074 tcp_state previousState = fState; 1075 1076 if (fState == SYNCHRONIZE_RECEIVED || fState == ESTABLISHED) 1077 fState = FINISH_SENT; 1078 else if (fState == FINISH_RECEIVED) 1079 fState = WAIT_FOR_FINISH_ACKNOWLEDGE; 1080 else 1081 return B_OK; 1082 1083 T(State(this)); 1084 1085 status_t status = _SendQueued(); 1086 if (status != B_OK) { 1087 fState = previousState; 1088 T(State(this)); 1089 return status; 1090 } 1091 1092 return B_OK; 1093 } 1094 1095 1096 void 1097 TCPEndpoint::_MarkEstablished() 1098 { 1099 fState = ESTABLISHED; 1100 T(State(this)); 1101 1102 if (socket->parent != NULL) { 1103 gSocketModule->set_connected(socket); 1104 release_sem_etc(fAcceptSemaphore, 1, B_DO_NOT_RESCHEDULE); 1105 } 1106 1107 fSendList.Signal(); 1108 } 1109 1110 1111 status_t 1112 TCPEndpoint::_WaitForEstablished(MutexLocker &locker, bigtime_t timeout) 1113 { 1114 while (fState != ESTABLISHED) { 1115 if (socket->error != B_OK) 1116 return socket->error; 1117 1118 status_t status = fSendList.Wait(locker, timeout); 1119 if (status < B_OK) 1120 return status; 1121 } 1122 1123 return B_OK; 1124 } 1125 1126 1127 // #pragma mark - receive 1128 1129 1130 void 1131 TCPEndpoint::_Close() 1132 { 1133 _CancelConnectionTimers(); 1134 fState = CLOSED; 1135 T(State(this)); 1136 1137 fFlags |= FLAG_DELETE_ON_CLOSE; 1138 } 1139 1140 1141 void 1142 TCPEndpoint::_HandleReset(status_t error) 1143 { 1144 socket->error = error; 1145 _Close(); 1146 1147 fSendList.Signal(); 1148 _NotifyReader(); 1149 1150 gSocketModule->notify(socket, B_SELECT_WRITE, error); 1151 gSocketModule->notify(socket, B_SELECT_ERROR, error); 1152 } 1153 1154 1155 void 1156 TCPEndpoint::_DuplicateAcknowledge(tcp_segment_header &segment) 1157 { 1158 if (++fDuplicateAcknowledgeCount < 3) 1159 return; 1160 1161 if (fDuplicateAcknowledgeCount == 3) { 1162 _ResetSlowStart(); 1163 fCongestionWindow = fSlowStartThreshold + 3 1164 * fSendMaxSegmentSize; 1165 fSendNext = segment.acknowledge; 1166 } else if (fDuplicateAcknowledgeCount > 3) 1167 fCongestionWindow += fSendMaxSegmentSize; 1168 1169 _SendQueued(); 1170 } 1171 1172 1173 void 1174 TCPEndpoint::_UpdateTimestamps(tcp_segment_header& segment, 1175 size_t segmentLength) 1176 { 1177 if (fFlags & FLAG_OPTION_TIMESTAMP) { 1178 tcp_sequence sequence(segment.sequence); 1179 1180 if (fLastAcknowledgeSent >= sequence 1181 && fLastAcknowledgeSent < (sequence + segmentLength)) 1182 fReceivedTimestamp = segment.timestamp_value; 1183 } 1184 } 1185 1186 1187 ssize_t 1188 TCPEndpoint::_AvailableData() const 1189 { 1190 // TODO: Refer to the FLAG_NO_RECEIVE comment above regarding 1191 // the application of FLAG_NO_RECEIVE in listen()ing 1192 // sockets. 1193 if (fState == LISTEN) 1194 return gSocketModule->count_connected(socket); 1195 if (fState == SYNCHRONIZE_SENT) 1196 return 0; 1197 1198 ssize_t availableData = fReceiveQueue.Available(); 1199 1200 if (availableData == 0 && !_ShouldReceive()) 1201 return ENOTCONN; 1202 1203 return availableData; 1204 } 1205 1206 1207 void 1208 TCPEndpoint::_NotifyReader() 1209 { 1210 fReceiveList.Signal(); 1211 gSocketModule->notify(socket, B_SELECT_READ, _AvailableData()); 1212 } 1213 1214 1215 bool 1216 TCPEndpoint::_AddData(tcp_segment_header& segment, net_buffer* buffer) 1217 { 1218 fReceiveQueue.Add(buffer, segment.sequence); 1219 fReceiveNext = fReceiveQueue.NextSequence(); 1220 1221 TRACE(" _AddData(): adding data, receive next = %lu. Now have %lu bytes.", 1222 (uint32)fReceiveNext, fReceiveQueue.Available()); 1223 1224 if (segment.flags & TCP_FLAG_PUSH) 1225 fReceiveQueue.SetPushPointer(); 1226 1227 return fReceiveQueue.Available() > 0; 1228 } 1229 1230 1231 void 1232 TCPEndpoint::_PrepareReceivePath(tcp_segment_header& segment) 1233 { 1234 fInitialReceiveSequence = segment.sequence; 1235 1236 // count the received SYN 1237 segment.sequence++; 1238 1239 fReceiveNext = segment.sequence; 1240 fReceiveQueue.SetInitialSequence(segment.sequence); 1241 1242 if ((fOptions & TCP_NOOPT) == 0) { 1243 if (segment.max_segment_size > 0) 1244 fSendMaxSegmentSize = segment.max_segment_size; 1245 1246 if (segment.options & TCP_HAS_WINDOW_SCALE) { 1247 fFlags |= FLAG_OPTION_WINDOW_SCALE; 1248 fSendWindowShift = segment.window_shift; 1249 } else { 1250 fFlags &= ~FLAG_OPTION_WINDOW_SCALE; 1251 fReceiveWindowShift = 0; 1252 } 1253 1254 if (segment.options & TCP_HAS_TIMESTAMPS) { 1255 fFlags |= FLAG_OPTION_TIMESTAMP; 1256 fReceivedTimestamp = segment.timestamp_value; 1257 } else 1258 fFlags &= ~FLAG_OPTION_TIMESTAMP; 1259 } 1260 1261 fCongestionWindow = 2 * fSendMaxSegmentSize; 1262 fSlowStartThreshold = (uint32)segment.advertised_window << fSendWindowShift; 1263 } 1264 1265 1266 bool 1267 TCPEndpoint::_ShouldReceive() const 1268 { 1269 if (fFlags & FLAG_NO_RECEIVE) 1270 return false; 1271 1272 return fState == ESTABLISHED || fState == FINISH_SENT 1273 || fState == FINISH_ACKNOWLEDGED; 1274 } 1275 1276 1277 int32 1278 TCPEndpoint::_Spawn(TCPEndpoint* parent, tcp_segment_header& segment, 1279 net_buffer* buffer) 1280 { 1281 MutexLocker _(fLock); 1282 1283 // TODO error checking 1284 ProtocolSocket::Open(); 1285 1286 fState = SYNCHRONIZE_RECEIVED; 1287 T(State(this)); 1288 fManager = parent->fManager; 1289 1290 LocalAddress().SetTo(buffer->destination); 1291 PeerAddress().SetTo(buffer->source); 1292 1293 TRACE("Spawn()"); 1294 1295 // TODO: proper error handling! 1296 if (fManager->BindChild(this) < B_OK) 1297 return DROP; 1298 1299 if (_PrepareSendPath(*PeerAddress()) < B_OK) 1300 return DROP; 1301 1302 fOptions = parent->fOptions; 1303 fAcceptSemaphore = parent->fAcceptSemaphore; 1304 1305 _PrepareReceivePath(segment); 1306 1307 // send SYN+ACK 1308 if (_SendQueued() < B_OK) 1309 return DROP; 1310 1311 segment.flags &= ~TCP_FLAG_SYNCHRONIZE; 1312 // we handled this flag now, it must not be set for further processing 1313 1314 return _Receive(segment, buffer); 1315 } 1316 1317 1318 int32 1319 TCPEndpoint::_ListenReceive(tcp_segment_header& segment, net_buffer* buffer) 1320 { 1321 TRACE("ListenReceive()"); 1322 1323 // Essentially, we accept only TCP_FLAG_SYNCHRONIZE in this state, 1324 // but the error behaviour differs 1325 if (segment.flags & TCP_FLAG_RESET) 1326 return DROP; 1327 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) 1328 return DROP | RESET; 1329 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0) 1330 return DROP; 1331 1332 // TODO: drop broadcast/multicast 1333 1334 // spawn new endpoint for accept() 1335 net_socket *newSocket; 1336 if (gSocketModule->spawn_pending_socket(socket, &newSocket) < B_OK) 1337 return DROP; 1338 1339 return ((TCPEndpoint *)newSocket->first_protocol)->_Spawn(this, 1340 segment, buffer); 1341 } 1342 1343 1344 int32 1345 TCPEndpoint::_SynchronizeSentReceive(tcp_segment_header &segment, 1346 net_buffer *buffer) 1347 { 1348 TRACE("_SynchronizeSentReceive()"); 1349 1350 if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0 1351 && (fInitialSendSequence >= segment.acknowledge 1352 || fSendMax < segment.acknowledge)) 1353 return DROP | RESET; 1354 1355 if (segment.flags & TCP_FLAG_RESET) { 1356 _HandleReset(ECONNREFUSED); 1357 return DROP; 1358 } 1359 1360 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0) 1361 return DROP; 1362 1363 fSendUnacknowledged = segment.acknowledge; 1364 _PrepareReceivePath(segment); 1365 1366 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) { 1367 _MarkEstablished(); 1368 } else { 1369 // simultaneous open 1370 fState = SYNCHRONIZE_RECEIVED; 1371 T(State(this)); 1372 } 1373 1374 segment.flags &= ~TCP_FLAG_SYNCHRONIZE; 1375 // we handled this flag now, it must not be set for further processing 1376 1377 return _Receive(segment, buffer) | IMMEDIATE_ACKNOWLEDGE; 1378 } 1379 1380 1381 int32 1382 TCPEndpoint::_Receive(tcp_segment_header& segment, net_buffer* buffer) 1383 { 1384 uint32 advertisedWindow = (uint32)segment.advertised_window 1385 << fSendWindowShift; 1386 size_t segmentLength = buffer->size; 1387 1388 // First, handle the most common case for uni-directional data transfer 1389 // (known as header prediction - the segment must not change the window, 1390 // and must be the expected sequence, and contain no control flags) 1391 1392 if (fState == ESTABLISHED 1393 && segment.AcknowledgeOnly() 1394 && fReceiveNext == segment.sequence 1395 && advertisedWindow > 0 && advertisedWindow == fSendWindow 1396 && fSendNext == fSendMax) { 1397 _UpdateTimestamps(segment, segmentLength); 1398 1399 if (segmentLength == 0) { 1400 // this is a pure acknowledge segment - we're on the sending end 1401 if (fSendUnacknowledged < segment.acknowledge 1402 && fSendMax >= segment.acknowledge) { 1403 _Acknowledged(segment); 1404 return DROP; 1405 } 1406 } else if (segment.acknowledge == fSendUnacknowledged 1407 && fReceiveQueue.IsContiguous() 1408 && fReceiveQueue.Free() >= segmentLength 1409 && !(fFlags & FLAG_NO_RECEIVE)) { 1410 if (_AddData(segment, buffer)) 1411 _NotifyReader(); 1412 1413 return KEEP | ((segment.flags & TCP_FLAG_PUSH) != 0 1414 ? IMMEDIATE_ACKNOWLEDGE : ACKNOWLEDGE); 1415 } 1416 } 1417 1418 // The fast path was not applicable, so we continue with the standard 1419 // processing of the incoming segment 1420 1421 ASSERT(fState != SYNCHRONIZE_SENT && fState != LISTEN); 1422 1423 if (fState != CLOSED && fState != TIME_WAIT) { 1424 // Check sequence number 1425 if (!segment_in_sequence(segment, segmentLength, fReceiveNext, 1426 fReceiveWindow)) { 1427 TRACE(" Receive(): segment out of window, next: %lu wnd: %lu", 1428 (uint32)fReceiveNext, fReceiveWindow); 1429 if (segment.flags & TCP_FLAG_RESET) { 1430 // TODO: this doesn't look right - review! 1431 return DROP; 1432 } 1433 return DROP | IMMEDIATE_ACKNOWLEDGE; 1434 } 1435 } 1436 1437 if (segment.flags & TCP_FLAG_RESET) { 1438 // Is this a valid reset? 1439 // We generally ignore resets in time wait state (see RFC 1337) 1440 if (fLastAcknowledgeSent <= segment.sequence 1441 && tcp_sequence(segment.sequence) < (fLastAcknowledgeSent 1442 + fReceiveWindow) 1443 && fState != TIME_WAIT) { 1444 status_t error; 1445 if (fState == SYNCHRONIZE_RECEIVED) 1446 error = ECONNREFUSED; 1447 else if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE) 1448 error = ENOTCONN; 1449 else 1450 error = ECONNRESET; 1451 1452 _HandleReset(error); 1453 } 1454 1455 return DROP; 1456 } 1457 1458 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0 1459 || (fState == SYNCHRONIZE_RECEIVED 1460 && (fInitialReceiveSequence > segment.sequence 1461 || (segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0 1462 && (fSendUnacknowledged > segment.acknowledge 1463 || fSendMax < segment.acknowledge)))) { 1464 // reset the connection - either the initial SYN was faulty, or we 1465 // received a SYN within the data stream 1466 return DROP | RESET; 1467 } 1468 1469 fReceiveWindow = max_c(fReceiveQueue.Free(), fReceiveWindow); 1470 // the window must not shrink 1471 1472 // trim buffer to be within the receive window 1473 int32 drop = fReceiveNext - segment.sequence; 1474 if (drop > 0) { 1475 if ((uint32)drop > buffer->size 1476 || ((uint32)drop == buffer->size 1477 && (segment.flags & TCP_FLAG_FINISH) == 0)) { 1478 // don't accidently remove a FIN we shouldn't remove 1479 segment.flags &= ~TCP_FLAG_FINISH; 1480 drop = buffer->size; 1481 } 1482 1483 // remove duplicate data at the start 1484 TRACE("* remove %ld bytes from the start", drop); 1485 gBufferModule->remove_header(buffer, drop); 1486 segment.sequence += drop; 1487 } 1488 1489 int32 action = KEEP; 1490 1491 drop = segment.sequence + buffer->size - (fReceiveNext + fReceiveWindow); 1492 if (drop > 0) { 1493 // remove data exceeding our window 1494 if ((uint32)drop >= buffer->size) { 1495 // if we can accept data, or the segment is not what we'd expect, 1496 // drop the segment (an immediate acknowledge is always triggered) 1497 if (fReceiveWindow != 0 || segment.sequence != fReceiveNext) 1498 return DROP | IMMEDIATE_ACKNOWLEDGE; 1499 1500 action |= IMMEDIATE_ACKNOWLEDGE; 1501 } 1502 1503 if ((segment.flags & TCP_FLAG_FINISH) != 0) { 1504 // we need to remove the finish, too, as part of the data 1505 drop--; 1506 } 1507 1508 segment.flags &= ~(TCP_FLAG_FINISH | TCP_FLAG_PUSH); 1509 TRACE("* remove %ld bytes from the end", drop); 1510 gBufferModule->remove_trailer(buffer, drop); 1511 } 1512 1513 #ifdef TRACE_TCP 1514 if (advertisedWindow > fSendWindow) { 1515 TRACE(" Receive(): Window update %lu -> %lu", fSendWindow, 1516 advertisedWindow); 1517 } 1518 #endif 1519 1520 fSendWindow = advertisedWindow; 1521 if (advertisedWindow > fSendMaxWindow) 1522 fSendMaxWindow = advertisedWindow; 1523 1524 // Then look at the acknowledgement for any updates 1525 1526 if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0) { 1527 // process acknowledged data 1528 if (fState == SYNCHRONIZE_RECEIVED) 1529 _MarkEstablished(); 1530 1531 if (fSendMax < segment.acknowledge) 1532 return DROP | IMMEDIATE_ACKNOWLEDGE; 1533 1534 if (segment.acknowledge < fSendUnacknowledged) { 1535 if (buffer->size == 0 && advertisedWindow == fSendWindow 1536 && (segment.flags & TCP_FLAG_FINISH) == 0) { 1537 TRACE("Receive(): duplicate ack!"); 1538 1539 _DuplicateAcknowledge(segment); 1540 } 1541 1542 return DROP; 1543 } else { 1544 // this segment acknowledges in flight data 1545 1546 if (fDuplicateAcknowledgeCount >= 3) { 1547 // deflate the window. 1548 fCongestionWindow = fSlowStartThreshold; 1549 } 1550 1551 fDuplicateAcknowledgeCount = 0; 1552 1553 if (fSendMax == segment.acknowledge) 1554 TRACE("Receive(): all inflight data ack'd!"); 1555 1556 if (segment.acknowledge > fSendQueue.LastSequence() 1557 && fState > ESTABLISHED) { 1558 TRACE("Receive(): FIN has been acknowledged!"); 1559 1560 switch (fState) { 1561 case FINISH_SENT: 1562 fState = FINISH_ACKNOWLEDGED; 1563 T(State(this)); 1564 break; 1565 case CLOSING: 1566 fState = TIME_WAIT; 1567 T(State(this)); 1568 _EnterTimeWait(); 1569 return DROP; 1570 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1571 _Close(); 1572 break; 1573 1574 default: 1575 break; 1576 } 1577 } 1578 1579 if (fState != CLOSED) 1580 _Acknowledged(segment); 1581 } 1582 } 1583 1584 if (segment.flags & TCP_FLAG_URGENT) { 1585 if (fState == ESTABLISHED || fState == FINISH_SENT 1586 || fState == FINISH_ACKNOWLEDGED) { 1587 // TODO: Handle urgent data: 1588 // - RCV.UP <- max(RCV.UP, SEG.UP) 1589 // - signal the user that urgent data is available (SIGURG) 1590 } 1591 } 1592 1593 bool notify = false; 1594 1595 if (buffer->size > 0 && _ShouldReceive()) 1596 notify = _AddData(segment, buffer); 1597 else 1598 action = (action & ~KEEP) | DROP; 1599 1600 if (segment.flags & TCP_FLAG_FINISH) { 1601 segmentLength++; 1602 if (fState != CLOSED && fState != LISTEN && fState != SYNCHRONIZE_SENT) { 1603 TRACE("Receive(): peer is finishing connection!"); 1604 fReceiveNext++; 1605 notify = true; 1606 1607 // FIN implies PSH 1608 fReceiveQueue.SetPushPointer(); 1609 1610 // we'll reply immediately to the FIN if we are not 1611 // transitioning to TIME WAIT so we immediatly ACK it. 1612 action |= IMMEDIATE_ACKNOWLEDGE; 1613 1614 // other side is closing connection; change states 1615 switch (fState) { 1616 case ESTABLISHED: 1617 case SYNCHRONIZE_RECEIVED: 1618 fState = FINISH_RECEIVED; 1619 T(State(this)); 1620 break; 1621 case FINISH_SENT: 1622 // simultaneous close 1623 fState = CLOSING; 1624 T(State(this)); 1625 break; 1626 case FINISH_ACKNOWLEDGED: 1627 fState = TIME_WAIT; 1628 T(State(this)); 1629 _EnterTimeWait(); 1630 break; 1631 case TIME_WAIT: 1632 _UpdateTimeWait(); 1633 break; 1634 1635 default: 1636 break; 1637 } 1638 } 1639 } 1640 1641 if (notify) 1642 _NotifyReader(); 1643 1644 if (buffer->size > 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) != 0) 1645 action |= ACKNOWLEDGE; 1646 1647 _UpdateTimestamps(segment, segmentLength); 1648 1649 TRACE("Receive() Action %ld", action); 1650 1651 return action; 1652 } 1653 1654 1655 int32 1656 TCPEndpoint::SegmentReceived(tcp_segment_header& segment, net_buffer* buffer) 1657 { 1658 MutexLocker locker(fLock); 1659 1660 TRACE("SegmentReceived(): buffer %p (%lu bytes) address %s to %s\n" 1661 "\tflags 0x%x, seq %lu, ack %lu, wnd %lu", 1662 buffer, buffer->size, PrintAddress(buffer->source), 1663 PrintAddress(buffer->destination), segment.flags, segment.sequence, 1664 segment.acknowledge, 1665 (uint32)segment.advertised_window << fSendWindowShift); 1666 T(Receive(this, segment, 1667 (uint32)segment.advertised_window << fSendWindowShift, buffer)); 1668 int32 segmentAction = DROP; 1669 1670 switch (fState) { 1671 case LISTEN: 1672 segmentAction = _ListenReceive(segment, buffer); 1673 break; 1674 1675 case SYNCHRONIZE_SENT: 1676 segmentAction = _SynchronizeSentReceive(segment, buffer); 1677 break; 1678 1679 case SYNCHRONIZE_RECEIVED: 1680 case ESTABLISHED: 1681 case FINISH_RECEIVED: 1682 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1683 case FINISH_SENT: 1684 case FINISH_ACKNOWLEDGED: 1685 case CLOSING: 1686 case TIME_WAIT: 1687 case CLOSED: 1688 segmentAction = _Receive(segment, buffer); 1689 break; 1690 } 1691 1692 // process acknowledge action as asked for by the *Receive() method 1693 if (segmentAction & IMMEDIATE_ACKNOWLEDGE) 1694 SendAcknowledge(true); 1695 else if (segmentAction & ACKNOWLEDGE) 1696 DelayedAcknowledge(); 1697 1698 if ((fFlags & (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) 1699 == (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) { 1700 locker.Unlock(); 1701 gSocketModule->delete_socket(socket); 1702 // this will also delete us 1703 } 1704 1705 return segmentAction; 1706 } 1707 1708 1709 // #pragma mark - send 1710 1711 1712 inline uint8 1713 TCPEndpoint::_CurrentFlags() 1714 { 1715 // we don't set FLAG_FINISH here, instead we do it 1716 // conditionally below depending if we are sending 1717 // the last bytes of the send queue. 1718 1719 switch (fState) { 1720 case CLOSED: 1721 return TCP_FLAG_RESET | TCP_FLAG_ACKNOWLEDGE; 1722 1723 case SYNCHRONIZE_SENT: 1724 return TCP_FLAG_SYNCHRONIZE; 1725 case SYNCHRONIZE_RECEIVED: 1726 return TCP_FLAG_SYNCHRONIZE | TCP_FLAG_ACKNOWLEDGE; 1727 1728 case ESTABLISHED: 1729 case FINISH_RECEIVED: 1730 case FINISH_ACKNOWLEDGED: 1731 case TIME_WAIT: 1732 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1733 case FINISH_SENT: 1734 case CLOSING: 1735 return TCP_FLAG_ACKNOWLEDGE; 1736 1737 default: 1738 return 0; 1739 } 1740 } 1741 1742 1743 inline bool 1744 TCPEndpoint::_ShouldSendSegment(tcp_segment_header& segment, uint32 length, 1745 uint32 segmentMaxSize, uint32 flightSize) 1746 { 1747 if (length > 0) { 1748 // Avoid the silly window syndrome - we only send a segment in case: 1749 // - we have a full segment to send, or 1750 // - we're at the end of our buffer queue, or 1751 // - the buffer is at least larger than half of the maximum send window, 1752 // or 1753 // - we're retransmitting data 1754 if (length == segmentMaxSize 1755 || (fOptions & TCP_NODELAY) != 0 1756 || tcp_sequence(fSendNext + length) == fSendQueue.LastSequence() 1757 || (fSendMaxWindow > 0 && length >= fSendMaxWindow / 2)) 1758 return true; 1759 } 1760 1761 // check if we need to send a window update to the peer 1762 if (segment.advertised_window > 0) { 1763 // correct the window to take into account what already has been advertised 1764 uint32 window = (segment.advertised_window << fReceiveWindowShift) 1765 - (fReceiveMaxAdvertised - fReceiveNext); 1766 1767 // if we can advertise a window larger than twice the maximum segment 1768 // size, or half the maximum buffer size we send a window update 1769 if (window >= (fReceiveMaxSegmentSize << 1) 1770 || window >= (socket->receive.buffer_size >> 1)) 1771 return true; 1772 } 1773 1774 if ((segment.flags & (TCP_FLAG_SYNCHRONIZE | TCP_FLAG_FINISH 1775 | TCP_FLAG_RESET)) != 0) 1776 return true; 1777 1778 // We do have urgent data pending 1779 if (fSendUrgentOffset > fSendNext) 1780 return true; 1781 1782 // there is no reason to send a segment just now 1783 return false; 1784 } 1785 1786 1787 status_t 1788 TCPEndpoint::_SendQueued(bool force) 1789 { 1790 return _SendQueued(force, fSendWindow); 1791 } 1792 1793 1794 /*! Sends one or more TCP segments with the data waiting in the queue, or some 1795 specific flags that need to be sent. 1796 */ 1797 status_t 1798 TCPEndpoint::_SendQueued(bool force, uint32 sendWindow) 1799 { 1800 if (fRoute == NULL) 1801 return B_ERROR; 1802 1803 // in passive state? 1804 if (fState == LISTEN) 1805 return B_ERROR; 1806 1807 tcp_segment_header segment(_CurrentFlags()); 1808 1809 if ((fOptions & TCP_NOOPT) == 0) { 1810 if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0) { 1811 segment.options |= TCP_HAS_TIMESTAMPS; 1812 segment.timestamp_reply = fReceivedTimestamp; 1813 segment.timestamp_value = tcp_now(); 1814 } 1815 1816 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0 1817 && fSendNext == fInitialSendSequence) { 1818 // add connection establishment options 1819 segment.max_segment_size = fReceiveMaxSegmentSize; 1820 if (fFlags & FLAG_OPTION_WINDOW_SCALE) { 1821 segment.options |= TCP_HAS_WINDOW_SCALE; 1822 segment.window_shift = fReceiveWindowShift; 1823 } 1824 } 1825 } 1826 1827 size_t availableBytes = fReceiveQueue.Free(); 1828 if (fFlags & FLAG_OPTION_WINDOW_SCALE) 1829 segment.advertised_window = availableBytes >> fReceiveWindowShift; 1830 else 1831 segment.advertised_window = min_c(TCP_MAX_WINDOW, availableBytes); 1832 1833 segment.acknowledge = fReceiveNext; 1834 1835 // Process urgent data 1836 if (fSendUrgentOffset > fSendNext) { 1837 segment.flags |= TCP_FLAG_URGENT; 1838 segment.urgent_offset = fSendUrgentOffset - fSendNext; 1839 } else { 1840 fSendUrgentOffset = fSendUnacknowledged; 1841 // Keep urgent offset updated, so that it doesn't reach into our 1842 // send window on overlap 1843 segment.urgent_offset = 0; 1844 } 1845 1846 if (fCongestionWindow > 0 && fCongestionWindow < sendWindow) 1847 sendWindow = fCongestionWindow; 1848 1849 // fSendUnacknowledged 1850 // | fSendNext fSendMax 1851 // | | | 1852 // v v v 1853 // ----------------------------------- 1854 // | effective window | 1855 // ----------------------------------- 1856 1857 // Flight size represents the window of data which is currently in the 1858 // ether. We should never send data such as the flight size becomes larger 1859 // than the effective window. Note however that the effective window may be 1860 // reduced (by congestion for instance), so at some point in time flight 1861 // size may be larger than the currently calculated window. 1862 1863 uint32 flightSize = fSendMax - fSendUnacknowledged; 1864 uint32 consumedWindow = fSendNext - fSendUnacknowledged; 1865 1866 if (consumedWindow > sendWindow) { 1867 sendWindow = 0; 1868 // TODO enter persist state? try to get a window update. 1869 } else 1870 sendWindow -= consumedWindow; 1871 1872 if (force && sendWindow == 0 && fSendNext <= fSendQueue.LastSequence()) { 1873 // send one byte of data to ask for a window update 1874 // (triggered by the persist timer) 1875 sendWindow = 1; 1876 } 1877 1878 uint32 length = min_c(fSendQueue.Available(fSendNext), sendWindow); 1879 tcp_sequence previousSendNext = fSendNext; 1880 1881 do { 1882 uint32 segmentMaxSize = fSendMaxSegmentSize 1883 - tcp_options_length(segment); 1884 uint32 segmentLength = min_c(length, segmentMaxSize); 1885 1886 if (fSendNext + segmentLength == fSendQueue.LastSequence()) { 1887 if (state_needs_finish(fState)) 1888 segment.flags |= TCP_FLAG_FINISH; 1889 if (length > 0) 1890 segment.flags |= TCP_FLAG_PUSH; 1891 } 1892 1893 // Determine if we should really send this segment 1894 if (!force && !_ShouldSendSegment(segment, segmentLength, 1895 segmentMaxSize, flightSize)) { 1896 if (fSendQueue.Available() 1897 && !gStackModule->is_timer_active(&fPersistTimer) 1898 && !gStackModule->is_timer_active(&fRetransmitTimer)) 1899 _StartPersistTimer(); 1900 break; 1901 } 1902 1903 net_buffer *buffer = gBufferModule->create(256); 1904 if (buffer == NULL) 1905 return B_NO_MEMORY; 1906 1907 status_t status = B_OK; 1908 if (segmentLength > 0) 1909 status = fSendQueue.Get(buffer, fSendNext, segmentLength); 1910 if (status < B_OK) { 1911 gBufferModule->free(buffer); 1912 return status; 1913 } 1914 1915 LocalAddress().CopyTo(buffer->source); 1916 PeerAddress().CopyTo(buffer->destination); 1917 1918 uint32 size = buffer->size; 1919 segment.sequence = fSendNext; 1920 1921 TRACE("SendQueued(): buffer %p (%lu bytes) address %s to %s\n" 1922 "\tflags 0x%x, seq %lu, ack %lu, rwnd %hu, cwnd %lu, ssthresh %lu\n" 1923 "\tlen %lu first %lu last %lu", 1924 buffer, buffer->size, PrintAddress(buffer->source), 1925 PrintAddress(buffer->destination), segment.flags, segment.sequence, 1926 segment.acknowledge, segment.advertised_window, 1927 fCongestionWindow, fSlowStartThreshold, segmentLength, 1928 (uint32)fSendQueue.FirstSequence(), 1929 (uint32)fSendQueue.LastSequence()); 1930 T(Send(this, segment, buffer, fSendQueue.FirstSequence(), 1931 fSendQueue.LastSequence())); 1932 1933 PROBE(buffer, sendWindow); 1934 sendWindow -= buffer->size; 1935 1936 status = add_tcp_header(AddressModule(), segment, buffer); 1937 if (status != B_OK) { 1938 gBufferModule->free(buffer); 1939 return status; 1940 } 1941 1942 // Update send status - we need to do this before we send the data 1943 // for local connections as the answer is directly handled 1944 1945 if (segment.flags & TCP_FLAG_SYNCHRONIZE) { 1946 segment.options &= ~TCP_HAS_WINDOW_SCALE; 1947 segment.max_segment_size = 0; 1948 size++; 1949 } 1950 1951 if (segment.flags & TCP_FLAG_FINISH) 1952 size++; 1953 1954 uint32 sendMax = fSendMax; 1955 fSendNext += size; 1956 if (fSendMax < fSendNext) 1957 fSendMax = fSendNext; 1958 1959 fReceiveMaxAdvertised = fReceiveNext 1960 + ((uint32)segment.advertised_window << fReceiveWindowShift); 1961 1962 status = next->module->send_routed_data(next, fRoute, buffer); 1963 if (status < B_OK) { 1964 gBufferModule->free(buffer); 1965 1966 fSendNext = segment.sequence; 1967 fSendMax = sendMax; 1968 // restore send status 1969 return status; 1970 } 1971 1972 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) 1973 fLastAcknowledgeSent = segment.acknowledge; 1974 1975 length -= segmentLength; 1976 segment.flags &= ~(TCP_FLAG_SYNCHRONIZE | TCP_FLAG_RESET 1977 | TCP_FLAG_FINISH); 1978 } while (length > 0); 1979 1980 // if we sent data from the beggining of the send queue, 1981 // start the retransmition timer 1982 if (previousSendNext == fSendUnacknowledged 1983 && fSendNext > previousSendNext) { 1984 TRACE(" SendQueue(): set retransmit timer with rto %llu", 1985 fRetransmitTimeout); 1986 1987 gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout); 1988 } 1989 1990 return B_OK; 1991 } 1992 1993 1994 int 1995 TCPEndpoint::_MaxSegmentSize(const sockaddr* address) const 1996 { 1997 return next->module->get_mtu(next, address) - sizeof(tcp_header); 1998 } 1999 2000 2001 status_t 2002 TCPEndpoint::_PrepareSendPath(const sockaddr* peer) 2003 { 2004 if (fRoute == NULL) { 2005 fRoute = gDatalinkModule->get_route(Domain(), peer); 2006 if (fRoute == NULL) 2007 return ENETUNREACH; 2008 } 2009 2010 // make sure connection does not already exist 2011 status_t status = fManager->SetConnection(this, *LocalAddress(), peer, 2012 fRoute->interface->address); 2013 if (status < B_OK) 2014 return status; 2015 2016 fInitialSendSequence = system_time() >> 4; 2017 fSendNext = fInitialSendSequence; 2018 fSendUnacknowledged = fInitialSendSequence; 2019 fSendMax = fInitialSendSequence; 2020 fSendUrgentOffset = fInitialSendSequence; 2021 2022 // we are counting the SYN here 2023 fSendQueue.SetInitialSequence(fSendNext + 1); 2024 2025 fReceiveMaxSegmentSize = _MaxSegmentSize(peer); 2026 2027 // Compute the window shift we advertise to our peer - if it doesn't support 2028 // this option, this will be reset to 0 (when its SYN is received) 2029 fReceiveWindowShift = 0; 2030 while (fReceiveWindowShift < TCP_MAX_WINDOW_SHIFT 2031 && (0xffffUL << fReceiveWindowShift) < socket->receive.buffer_size) { 2032 fReceiveWindowShift++; 2033 } 2034 2035 return B_OK; 2036 } 2037 2038 2039 void 2040 TCPEndpoint::_Acknowledged(tcp_segment_header& segment) 2041 { 2042 size_t previouslyUsed = fSendQueue.Used(); 2043 2044 fSendQueue.RemoveUntil(segment.acknowledge); 2045 fSendUnacknowledged = segment.acknowledge; 2046 2047 if (fSendNext < fSendUnacknowledged) 2048 fSendNext = fSendUnacknowledged; 2049 2050 if (fSendUnacknowledged == fSendMax) 2051 gStackModule->cancel_timer(&fRetransmitTimer); 2052 2053 if (fSendQueue.Used() < previouslyUsed) { 2054 // this ACK acknowledged data 2055 2056 if (segment.options & TCP_HAS_TIMESTAMPS) 2057 _UpdateRoundTripTime(tcp_diff_timestamp(segment.timestamp_reply)); 2058 else { 2059 // TODO: Fallback to RFC 793 type estimation 2060 } 2061 2062 if (is_writable(fState)) { 2063 // notify threads waiting on the socket to become writable again 2064 fSendList.Signal(); 2065 gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Used()); 2066 } 2067 2068 if (fCongestionWindow < fSlowStartThreshold) 2069 fCongestionWindow += fSendMaxSegmentSize; 2070 } 2071 2072 if (fCongestionWindow >= fSlowStartThreshold) { 2073 uint32 increment = fSendMaxSegmentSize * fSendMaxSegmentSize; 2074 2075 if (increment < fCongestionWindow) 2076 increment = 1; 2077 else 2078 increment /= fCongestionWindow; 2079 2080 fCongestionWindow += increment; 2081 } 2082 2083 // if there is data left to be send, send it now 2084 if (fSendQueue.Used() > 0) 2085 _SendQueued(); 2086 } 2087 2088 2089 void 2090 TCPEndpoint::_Retransmit() 2091 { 2092 TRACE("Retransmit()"); 2093 _ResetSlowStart(); 2094 fSendNext = fSendUnacknowledged; 2095 _SendQueued(); 2096 } 2097 2098 2099 void 2100 TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime) 2101 { 2102 int32 rtt = roundTripTime; 2103 2104 // "smooth" round trip time as per Van Jacobson 2105 rtt -= fRoundTripTime / 8; 2106 fRoundTripTime += rtt; 2107 if (rtt < 0) 2108 rtt = -rtt; 2109 rtt -= fRoundTripDeviation / 4; 2110 fRoundTripDeviation += rtt; 2111 2112 fRetransmitTimeout = ((fRoundTripTime / 4 + fRoundTripDeviation) / 2) 2113 * kTimestampFactor; 2114 2115 TRACE(" RTO is now %llu (after rtt %ldms)", fRetransmitTimeout, 2116 roundTripTime); 2117 } 2118 2119 2120 void 2121 TCPEndpoint::_ResetSlowStart() 2122 { 2123 fSlowStartThreshold = max_c((fSendMax - fSendUnacknowledged) / 2, 2124 2 * fSendMaxSegmentSize); 2125 fCongestionWindow = fSendMaxSegmentSize; 2126 } 2127 2128 2129 // #pragma mark - timer 2130 2131 2132 /*static*/ void 2133 TCPEndpoint::_RetransmitTimer(net_timer* timer, void* _endpoint) 2134 { 2135 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2136 T(Timer(endpoint, "retransmit")); 2137 2138 MutexLocker locker(endpoint->fLock); 2139 if (!locker.IsLocked()) 2140 return; 2141 2142 endpoint->_Retransmit(); 2143 } 2144 2145 2146 /*static*/ void 2147 TCPEndpoint::_PersistTimer(net_timer* timer, void* _endpoint) 2148 { 2149 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2150 T(Timer(endpoint, "persist")); 2151 2152 MutexLocker locker(endpoint->fLock); 2153 if (!locker.IsLocked()) 2154 return; 2155 2156 // the timer might not have been canceled early enough 2157 if (endpoint->State() == CLOSED) 2158 return; 2159 2160 endpoint->_SendQueued(true); 2161 } 2162 2163 2164 /*static*/ void 2165 TCPEndpoint::_DelayedAcknowledgeTimer(net_timer* timer, void* _endpoint) 2166 { 2167 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2168 T(Timer(endpoint, "delayed ack")); 2169 2170 MutexLocker locker(endpoint->fLock); 2171 if (!locker.IsLocked()) 2172 return; 2173 2174 // the timer might not have been canceled early enough 2175 if (endpoint->State() == CLOSED) 2176 return; 2177 2178 endpoint->SendAcknowledge(true); 2179 } 2180 2181 2182 /*static*/ void 2183 TCPEndpoint::_TimeWaitTimer(net_timer* timer, void* _endpoint) 2184 { 2185 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2186 T(Timer(endpoint, "time-wait")); 2187 2188 MutexLocker locker(endpoint->fLock); 2189 if (!locker.IsLocked()) 2190 return; 2191 2192 if ((endpoint->fFlags & FLAG_CLOSED) == 0) { 2193 endpoint->fFlags |= FLAG_DELETE_ON_CLOSE; 2194 return; 2195 } 2196 2197 locker.Unlock(); 2198 2199 gSocketModule->delete_socket(endpoint->socket); 2200 } 2201 2202 2203 // #pragma mark - 2204 2205 2206 void 2207 TCPEndpoint::Dump() const 2208 { 2209 kprintf("TCP endpoint %p\n", this); 2210 kprintf(" state: %s\n", name_for_state(fState)); 2211 kprintf(" flags: 0x%lx\n", fFlags); 2212 #ifdef KDEBUG 2213 kprintf(" lock: { %p, holder: %ld }\n", &fLock, fLock.holder); 2214 #endif 2215 kprintf(" accept sem: %ld\n", fAcceptSemaphore); 2216 kprintf(" options: 0x%lx\n", (uint32)fOptions); 2217 kprintf(" send\n"); 2218 kprintf(" window shift: %lu\n", (uint32)fSendWindowShift); 2219 kprintf(" unacknowledged: %lu\n", (uint32)fSendUnacknowledged); 2220 kprintf(" next: %lu\n", (uint32)fSendNext); 2221 kprintf(" max: %lu\n", (uint32)fSendMax); 2222 kprintf(" urgent offset: %lu\n", (uint32)fSendUrgentOffset); 2223 kprintf(" window: %lu\n", fSendWindow); 2224 kprintf(" max window: %lu\n", fSendMaxWindow); 2225 kprintf(" max segment size: %lu\n", fSendMaxSegmentSize); 2226 kprintf(" queue: %lu / %lu\n", fSendQueue.Used(), fSendQueue.Size()); 2227 kprintf(" last acknowledge sent: %lu\n", (uint32)fLastAcknowledgeSent); 2228 kprintf(" initial sequence: %lu\n", (uint32)fInitialSendSequence); 2229 kprintf(" receive\n"); 2230 kprintf(" window shift: %lu\n", (uint32)fReceiveWindowShift); 2231 kprintf(" next: %lu\n", (uint32)fReceiveNext); 2232 kprintf(" max advertised: %lu\n", (uint32)fReceiveMaxAdvertised); 2233 kprintf(" window: %lu\n", (uint32)fReceiveWindow); 2234 kprintf(" max segment size: %lu\n", (uint32)fReceiveMaxSegmentSize); 2235 kprintf(" queue: %lu / %lu\n", fReceiveQueue.Available(), 2236 fReceiveQueue.Size()); 2237 kprintf(" initial sequence: %lu\n", (uint32)fInitialReceiveSequence); 2238 kprintf(" duplicate acknowledge count: %lu\n", 2239 fDuplicateAcknowledgeCount); 2240 kprintf(" round trip time: %ld (deviation %ld)\n", fRoundTripTime, 2241 fRoundTripDeviation); 2242 kprintf(" retransmit timeout: %llu\n", (uint64)fRetransmitTimeout); 2243 kprintf(" congestion window: %lu\n", fCongestionWindow); 2244 kprintf(" slow start threshold: %lu\n", fSlowStartThreshold); 2245 } 2246 2247