1 /* 2 * Copyright 2006-2008, Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Andrew Galante, haiku.galante@gmail.com 7 * Axel Dörfler, axeld@pinc-software.de 8 * Hugo Santos, hugosantos@gmail.com 9 */ 10 11 12 #include "TCPEndpoint.h" 13 14 #include <netinet/in.h> 15 #include <netinet/ip.h> 16 #include <netinet/tcp.h> 17 #include <new> 18 #include <signal.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 #include <KernelExport.h> 23 #include <Select.h> 24 25 #include <net_buffer.h> 26 #include <net_datalink.h> 27 #include <net_stat.h> 28 #include <NetBufferUtilities.h> 29 #include <NetUtilities.h> 30 31 #include <lock.h> 32 #include <tracing.h> 33 #include <util/AutoLock.h> 34 #include <util/khash.h> 35 #include <util/list.h> 36 37 #include "EndpointManager.h" 38 39 40 // References: 41 // - RFC 793 - Transmission Control Protocol 42 // - RFC 813 - Window and Acknowledgement Strategy in TCP 43 // - RFC 1337 - TIME_WAIT Assassination Hazards in TCP 44 // 45 // Things this implementation currently doesn't implement: 46 // - TCP Slow Start, Congestion Avoidance, Fast Retransmit, and Fast Recovery, 47 // RFC 2001, RFC 2581, RFC 3042 48 // - NewReno Modification to TCP's Fast Recovery, RFC 2582 49 // - Explicit Congestion Notification (ECN), RFC 3168 50 // - SYN-Cache 51 // - TCP Extensions for High Performance, RFC 1323 52 // - SACK, Selective Acknowledgment - RFC 2018, RFC 2883, RFC 3517 53 // - Forward RTO-Recovery, RFC 4138 54 // - Time-Wait hash instead of keeping sockets alive 55 56 #define PrintAddress(address) \ 57 AddressString(Domain(), address, true).Data() 58 59 //#define TRACE_TCP 60 //#define PROBE_TCP 61 62 #ifdef TRACE_TCP 63 // the space before ', ##args' is important in order for this to work with cpp 2.95 64 # define TRACE(format, args...) dprintf("%ld: TCP [%llu] %p (%12s) " \ 65 format "\n", find_thread(NULL), system_time(), this, \ 66 name_for_state(fState) , ##args) 67 #else 68 # define TRACE(args...) do { } while (0) 69 #endif 70 71 #ifdef PROBE_TCP 72 # define PROBE(buffer, window) \ 73 dprintf("TCP PROBE %llu %s %s %ld snxt %lu suna %lu cw %lu sst %lu win %lu swin %lu smax-suna %lu savail %lu sqused %lu rto %llu\n", \ 74 system_time(), PrintAddress(buffer->source), \ 75 PrintAddress(buffer->destination), buffer->size, (uint32)fSendNext, \ 76 (uint32)fSendUnacknowledged, fCongestionWindow, fSlowStartThreshold, \ 77 window, fSendWindow, (uint32)(fSendMax - fSendUnacknowledged), \ 78 fSendQueue.Available(fSendNext), fSendQueue.Used(), fRetransmitTimeout) 79 #else 80 # define PROBE(buffer, window) do { } while (0) 81 #endif 82 83 #if TCP_TRACING 84 namespace TCPTracing { 85 86 class Receive : public AbstractTraceEntry { 87 public: 88 Receive(TCPEndpoint* endpoint, tcp_segment_header& segment, uint32 window, 89 net_buffer* buffer) 90 : 91 fEndpoint(endpoint), 92 fBuffer(buffer), 93 fBufferSize(buffer->size), 94 fSequence(segment.sequence), 95 fAcknowledge(segment.acknowledge), 96 fWindow(window), 97 fState(endpoint->State()), 98 fFlags(segment.flags) 99 { 100 Initialized(); 101 } 102 103 virtual void AddDump(TraceOutput& out) 104 { 105 out.Print("tcp:%p (%12s) receive buffer %p (%lu bytes), flags %x, " 106 "seq %lu, ack %lu, wnd %lu", fEndpoint, name_for_state(fState), 107 fBuffer, fBufferSize, fFlags, fSequence, fAcknowledge, fWindow); 108 } 109 110 protected: 111 TCPEndpoint* fEndpoint; 112 net_buffer* fBuffer; 113 uint32 fBufferSize; 114 uint32 fSequence; 115 uint32 fAcknowledge; 116 uint32 fWindow; 117 tcp_state fState; 118 uint8 fFlags; 119 }; 120 121 class Send : public AbstractTraceEntry { 122 public: 123 Send(TCPEndpoint* endpoint, tcp_segment_header& segment, net_buffer* buffer, 124 uint32 firstSequence, uint32 lastSequence) 125 : 126 fEndpoint(endpoint), 127 fBuffer(buffer), 128 fBufferSize(buffer->size), 129 fSequence(segment.sequence), 130 fAcknowledge(segment.acknowledge), 131 fFirstSequence(firstSequence), 132 fLastSequence(lastSequence), 133 fState(endpoint->State()), 134 fFlags(segment.flags) 135 { 136 Initialized(); 137 } 138 139 virtual void AddDump(TraceOutput& out) 140 { 141 out.Print("tcp:%p (%12s) send buffer %p (%lu bytes), flags %x, " 142 "seq %lu, ack %lu, first %lu, last %lu", 143 fEndpoint, name_for_state(fState), fBuffer, fBufferSize, fFlags, 144 fSequence, fAcknowledge, fFirstSequence, fLastSequence); 145 } 146 147 protected: 148 TCPEndpoint* fEndpoint; 149 net_buffer* fBuffer; 150 uint32 fBufferSize; 151 uint32 fSequence; 152 uint32 fAcknowledge; 153 uint32 fFirstSequence; 154 uint32 fLastSequence; 155 tcp_state fState; 156 uint8 fFlags; 157 }; 158 159 class State : public AbstractTraceEntry { 160 public: 161 State(TCPEndpoint* endpoint) 162 : 163 fEndpoint(endpoint), 164 fState(endpoint->State()) 165 { 166 Initialized(); 167 } 168 169 virtual void AddDump(TraceOutput& out) 170 { 171 out.Print("tcp:%p (%12s) state change", fEndpoint, 172 name_for_state(fState)); 173 } 174 175 protected: 176 TCPEndpoint* fEndpoint; 177 tcp_state fState; 178 }; 179 180 class Timer : public AbstractTraceEntry { 181 public: 182 Timer(TCPEndpoint* endpoint, const char* which) 183 : 184 fEndpoint(endpoint), 185 fWhich(which), 186 fState(endpoint->State()) 187 { 188 Initialized(); 189 } 190 191 virtual void AddDump(TraceOutput& out) 192 { 193 out.Print("tcp:%p (%12s) %s timer", fEndpoint, 194 name_for_state(fState), fWhich); 195 } 196 197 protected: 198 TCPEndpoint* fEndpoint; 199 const char* fWhich; 200 tcp_state fState; 201 }; 202 203 } // namespace TCPTracing 204 205 # define T(x) new(std::nothrow) TCPTracing::x 206 #else 207 # define T(x) 208 #endif // TCP_TRACING 209 210 // Initial estimate for packet round trip time (RTT) 211 #define TCP_INITIAL_RTT 2000000 212 213 // constants for the fFlags field 214 enum { 215 FLAG_OPTION_WINDOW_SCALE = 0x01, 216 FLAG_OPTION_TIMESTAMP = 0x02, 217 // TODO: Should FLAG_NO_RECEIVE apply as well to received connections? 218 // That is, what is expected from accept() after a shutdown() 219 // is performed on a listen()ing socket. 220 FLAG_NO_RECEIVE = 0x04, 221 FLAG_CLOSED = 0x08, 222 FLAG_DELETE_ON_CLOSE = 0x10, 223 FLAG_LOCAL = 0x20 224 }; 225 226 227 static const int kTimestampFactor = 1024; 228 229 230 static inline bigtime_t 231 absolute_timeout(bigtime_t timeout) 232 { 233 if (timeout == 0 || timeout == B_INFINITE_TIMEOUT) 234 return timeout; 235 236 return timeout + system_time(); 237 } 238 239 240 static inline status_t 241 posix_error(status_t error) 242 { 243 if (error == B_TIMED_OUT) 244 return B_WOULD_BLOCK; 245 246 return error; 247 } 248 249 250 static inline bool 251 in_window(const tcp_sequence& sequence, const tcp_sequence& receiveNext, 252 uint32 receiveWindow) 253 { 254 return sequence >= receiveNext && sequence < (receiveNext + receiveWindow); 255 } 256 257 258 static inline bool 259 segment_in_sequence(const tcp_segment_header& segment, int size, 260 const tcp_sequence& receiveNext, uint32 receiveWindow) 261 { 262 tcp_sequence sequence(segment.sequence); 263 if (size == 0) { 264 if (receiveWindow == 0) 265 return sequence == receiveNext; 266 return in_window(sequence, receiveNext, receiveWindow); 267 } else { 268 if (receiveWindow == 0) 269 return false; 270 return in_window(sequence, receiveNext, receiveWindow) 271 || in_window(sequence + size - 1, receiveNext, receiveWindow); 272 } 273 } 274 275 276 static inline bool 277 is_writable(tcp_state state) 278 { 279 return state == SYNCHRONIZE_SENT || state == SYNCHRONIZE_RECEIVED 280 || state == ESTABLISHED || state == FINISH_RECEIVED; 281 } 282 283 284 static inline uint32 tcp_now() 285 { 286 return system_time() / kTimestampFactor; 287 } 288 289 290 static inline uint32 tcp_diff_timestamp(uint32 base) 291 { 292 uint32 now = tcp_now(); 293 294 if (now > base) 295 return now - base; 296 297 return now + UINT_MAX - base; 298 } 299 300 301 static inline bool 302 state_needs_finish(int32 state) 303 { 304 return state == WAIT_FOR_FINISH_ACKNOWLEDGE 305 || state == FINISH_SENT || state == CLOSING; 306 } 307 308 309 // #pragma mark - 310 311 312 WaitList::WaitList(const char* name) 313 { 314 fCondition = 0; 315 fSem = create_sem(0, name); 316 } 317 318 319 WaitList::~WaitList() 320 { 321 delete_sem(fSem); 322 } 323 324 325 status_t 326 WaitList::InitCheck() const 327 { 328 return fSem; 329 } 330 331 332 status_t 333 WaitList::Wait(MutexLocker& locker, bigtime_t timeout) 334 { 335 locker.Unlock(); 336 337 status_t status = B_OK; 338 339 while (!atomic_test_and_set(&fCondition, 0, 1)) { 340 status = acquire_sem_etc(fSem, 1, B_ABSOLUTE_TIMEOUT | B_CAN_INTERRUPT, 341 timeout); 342 if (status != B_OK) 343 break; 344 } 345 346 locker.Lock(); 347 return status; 348 } 349 350 351 void 352 WaitList::Signal() 353 { 354 atomic_or(&fCondition, 1); 355 #ifdef __HAIKU__ 356 release_sem_etc(fSem, 1, B_DO_NOT_RESCHEDULE | B_RELEASE_ALL); 357 #else 358 int32 count; 359 if (get_sem_count(fSem, &count) == B_OK && count < 0) 360 release_sem_etc(fSem, -count, B_DO_NOT_RESCHEDULE); 361 #endif 362 } 363 364 365 // #pragma mark - 366 367 368 TCPEndpoint::TCPEndpoint(net_socket* socket) 369 : ProtocolSocket(socket), 370 fManager(NULL), 371 fReceiveList("tcp receive"), 372 fSendList("tcp send"), 373 fOptions(0), 374 fSendWindowShift(0), 375 fReceiveWindowShift(0), 376 fSendUnacknowledged(0), 377 fSendNext(0), 378 fSendMax(0), 379 fSendUrgentOffset(0), 380 fSendWindow(0), 381 fSendMaxWindow(0), 382 fSendMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), 383 fSendQueue(socket->send.buffer_size), 384 fInitialSendSequence(0), 385 fDuplicateAcknowledgeCount(0), 386 fRoute(NULL), 387 fReceiveNext(0), 388 fReceiveMaxAdvertised(0), 389 fReceiveWindow(socket->receive.buffer_size), 390 fReceiveMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), 391 fReceiveQueue(socket->receive.buffer_size), 392 fRoundTripTime(TCP_INITIAL_RTT / kTimestampFactor), 393 fRoundTripDeviation(TCP_INITIAL_RTT / kTimestampFactor), 394 fRetransmitTimeout(TCP_INITIAL_RTT), 395 fReceivedTimestamp(0), 396 fCongestionWindow(0), 397 fSlowStartThreshold(0), 398 fState(CLOSED), 399 fFlags(FLAG_OPTION_WINDOW_SCALE | FLAG_OPTION_TIMESTAMP) 400 { 401 // TODO: to be replaced with a real read/write locking strategy! 402 mutex_init(&fLock, "tcp lock"); 403 404 gStackModule->init_timer(&fPersistTimer, TCPEndpoint::_PersistTimer, this); 405 gStackModule->init_timer(&fRetransmitTimer, TCPEndpoint::_RetransmitTimer, 406 this); 407 gStackModule->init_timer(&fDelayedAcknowledgeTimer, 408 TCPEndpoint::_DelayedAcknowledgeTimer, this); 409 gStackModule->init_timer(&fTimeWaitTimer, TCPEndpoint::_TimeWaitTimer, 410 this); 411 } 412 413 414 TCPEndpoint::~TCPEndpoint() 415 { 416 mutex_lock(&fLock); 417 418 _CancelConnectionTimers(); 419 gStackModule->cancel_timer(&fTimeWaitTimer); 420 421 if (fManager != NULL) { 422 fManager->Unbind(this); 423 put_endpoint_manager(fManager); 424 } 425 426 mutex_destroy(&fLock); 427 428 // we need to wait for all timers to return 429 gStackModule->wait_for_timer(&fRetransmitTimer); 430 gStackModule->wait_for_timer(&fPersistTimer); 431 gStackModule->wait_for_timer(&fDelayedAcknowledgeTimer); 432 gStackModule->wait_for_timer(&fTimeWaitTimer); 433 } 434 435 436 status_t 437 TCPEndpoint::InitCheck() const 438 { 439 if (fReceiveList.InitCheck() < B_OK) 440 return fReceiveList.InitCheck(); 441 442 if (fSendList.InitCheck() < B_OK) 443 return fSendList.InitCheck(); 444 445 return B_OK; 446 } 447 448 449 // #pragma mark - protocol API 450 451 452 status_t 453 TCPEndpoint::Open() 454 { 455 TRACE("Open()"); 456 457 status_t status = ProtocolSocket::Open(); 458 if (status < B_OK) 459 return status; 460 461 fManager = get_endpoint_manager(Domain()); 462 if (fManager == NULL) 463 return EAFNOSUPPORT; 464 465 return B_OK; 466 } 467 468 469 status_t 470 TCPEndpoint::Close() 471 { 472 TRACE("Close()"); 473 474 MutexLocker locker(fLock); 475 476 if (fState == LISTEN) 477 delete_sem(fAcceptSemaphore); 478 479 if (fState == SYNCHRONIZE_SENT || fState == LISTEN) { 480 // TODO: what about linger in case of SYNCHRONIZE_SENT? 481 fState = CLOSED; 482 T(State(this)); 483 return B_OK; 484 } 485 486 status_t status = _Disconnect(true); 487 if (status != B_OK) 488 return status; 489 490 if (socket->options & SO_LINGER) { 491 TRACE("Close(): Lingering for %i secs", socket->linger); 492 493 bigtime_t maximum = absolute_timeout(socket->linger * 1000000LL); 494 495 while (fSendQueue.Used() > 0) { 496 status = fSendList.Wait(locker, maximum); 497 if (status == B_TIMED_OUT || status == B_WOULD_BLOCK) 498 break; 499 else if (status < B_OK) 500 return status; 501 } 502 503 TRACE("Close(): after waiting, the SendQ was left with %lu bytes.", 504 fSendQueue.Used()); 505 } 506 return B_OK; 507 } 508 509 510 status_t 511 TCPEndpoint::Free() 512 { 513 TRACE("Free()"); 514 515 MutexLocker _(fLock); 516 517 if (fState <= SYNCHRONIZE_SENT) 518 return B_OK; 519 520 // we are only interested in the timer, not in changing state 521 _EnterTimeWait(); 522 523 fFlags |= FLAG_CLOSED; 524 if ((fFlags & FLAG_DELETE_ON_CLOSE) != 0) 525 return B_OK; 526 527 return B_BUSY; 528 // we'll be freed later when the 2MSL timer expires 529 } 530 531 532 /*! Creates and sends a synchronize packet to /a address, and then waits 533 until the connection has been established or refused. 534 */ 535 status_t 536 TCPEndpoint::Connect(const sockaddr* address) 537 { 538 TRACE("Connect() on address %s", PrintAddress(address)); 539 540 MutexLocker locker(fLock); 541 542 if (gStackModule->is_restarted_syscall()) { 543 bigtime_t timeout = gStackModule->restore_syscall_restart_timeout(); 544 status_t status = _WaitForEstablished(locker, timeout); 545 TRACE(" Connect(): Connection complete: %s (timeout was %llu)", 546 strerror(status), timeout); 547 return posix_error(status); 548 } 549 550 // Can only call connect() from CLOSED or LISTEN states 551 // otherwise endpoint is considered already connected 552 if (fState == LISTEN) { 553 // this socket is about to connect; remove pending connections in the backlog 554 gSocketModule->set_max_backlog(socket, 0); 555 } else if (fState == ESTABLISHED) { 556 return EISCONN; 557 } else if (fState != CLOSED) 558 return EINPROGRESS; 559 560 // TODO: this is IPv4 specific, and doesn't belong here! 561 // consider destination address INADDR_ANY as INADDR_LOOPBACK 562 sockaddr_in _address; 563 if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY) { 564 memcpy(&_address, address, sizeof(sockaddr_in)); 565 _address.sin_len = sizeof(sockaddr_in); 566 _address.sin_addr.s_addr = INADDR_LOOPBACK; 567 address = (sockaddr*)&_address; 568 } 569 570 status_t status = _PrepareSendPath(address); 571 if (status < B_OK) 572 return status; 573 574 TRACE(" Connect(): starting 3-way handshake..."); 575 576 fState = SYNCHRONIZE_SENT; 577 T(State(this)); 578 579 // send SYN 580 status = _SendQueued(); 581 if (status != B_OK) { 582 _Close(); 583 return status; 584 } 585 586 // If we are running over Loopback, after _SendQueued() returns we 587 // may be in ESTABLISHED already. 588 if (fState == ESTABLISHED) { 589 TRACE(" Connect() completed after _SendQueued()"); 590 return B_OK; 591 } 592 593 // wait until 3-way handshake is complete (if needed) 594 bigtime_t timeout = min_c(socket->send.timeout, TCP_CONNECTION_TIMEOUT); 595 if (timeout == 0) { 596 // we're a non-blocking socket 597 TRACE(" Connect() delayed, return EINPROGRESS"); 598 return EINPROGRESS; 599 } 600 601 bigtime_t absoluteTimeout = absolute_timeout(timeout); 602 gStackModule->store_syscall_restart_timeout(absoluteTimeout); 603 604 status = _WaitForEstablished(locker, absoluteTimeout); 605 TRACE(" Connect(): Connection complete: %s (timeout was %llu)", 606 strerror(status), timeout); 607 return posix_error(status); 608 } 609 610 611 status_t 612 TCPEndpoint::Accept(struct net_socket** _acceptedSocket) 613 { 614 TRACE("Accept()"); 615 616 MutexLocker locker(fLock); 617 618 status_t status; 619 bigtime_t timeout = absolute_timeout(socket->receive.timeout); 620 if (gStackModule->is_restarted_syscall()) 621 timeout = gStackModule->restore_syscall_restart_timeout(); 622 else 623 gStackModule->store_syscall_restart_timeout(timeout); 624 625 do { 626 locker.Unlock(); 627 628 status = acquire_sem_etc(fAcceptSemaphore, 1, B_ABSOLUTE_TIMEOUT 629 | B_CAN_INTERRUPT, timeout); 630 if (status < B_OK) 631 return status; 632 633 locker.Lock(); 634 status = gSocketModule->dequeue_connected(socket, _acceptedSocket); 635 #ifdef TRACE_TCP 636 if (status == B_OK) 637 TRACE(" Accept() returning %p", (*_acceptedSocket)->first_protocol); 638 #endif 639 } while (status < B_OK); 640 641 return status; 642 } 643 644 645 status_t 646 TCPEndpoint::Bind(const sockaddr *address) 647 { 648 if (address == NULL) 649 return B_BAD_VALUE; 650 651 MutexLocker lock(fLock); 652 653 TRACE("Bind() on address %s", PrintAddress(address)); 654 655 if (fState != CLOSED) 656 return EISCONN; 657 658 return fManager->Bind(this, address); 659 } 660 661 662 status_t 663 TCPEndpoint::Unbind(struct sockaddr *address) 664 { 665 TRACE("Unbind()"); 666 667 MutexLocker lock(fLock); 668 return fManager->Unbind(this); 669 } 670 671 672 status_t 673 TCPEndpoint::Listen(int count) 674 { 675 TRACE("Listen()"); 676 677 MutexLocker lock(fLock); 678 679 if (fState != CLOSED) 680 return B_BAD_VALUE; 681 682 fAcceptSemaphore = create_sem(0, "tcp accept"); 683 if (fAcceptSemaphore < B_OK) 684 return ENOBUFS; 685 686 status_t status = fManager->SetPassive(this); 687 if (status < B_OK) { 688 delete_sem(fAcceptSemaphore); 689 fAcceptSemaphore = -1; 690 return status; 691 } 692 693 fState = LISTEN; 694 T(State(this)); 695 return B_OK; 696 } 697 698 699 status_t 700 TCPEndpoint::Shutdown(int direction) 701 { 702 TRACE("Shutdown(%i)", direction); 703 704 MutexLocker lock(fLock); 705 706 if (direction == SHUT_RD || direction == SHUT_RDWR) 707 fFlags |= FLAG_NO_RECEIVE; 708 709 if (direction == SHUT_WR || direction == SHUT_RDWR) { 710 // TODO: That's not correct. After read/write shutting down the socket 711 // one should still be able to read previously arrived data. 712 _Disconnect(false); 713 } 714 715 return B_OK; 716 } 717 718 719 /*! Puts data contained in \a buffer into send buffer */ 720 status_t 721 TCPEndpoint::SendData(net_buffer *buffer) 722 { 723 MutexLocker lock(fLock); 724 725 TRACE("SendData(buffer %p, size %lu, flags %lx) [total %lu bytes, has %lu]", 726 buffer, buffer->size, buffer->flags, fSendQueue.Size(), 727 fSendQueue.Free()); 728 729 if (fState == CLOSED) 730 return ENOTCONN; 731 if (fState == LISTEN) 732 return EDESTADDRREQ; 733 if (!is_writable(fState)) { 734 // we only send signals when called from userland 735 if (gStackModule->is_syscall) 736 send_signal(find_thread(NULL), SIGPIPE); 737 return EPIPE; 738 } 739 740 uint32 flags = buffer->flags; 741 size_t left = buffer->size; 742 743 bigtime_t timeout = absolute_timeout(socket->send.timeout); 744 if (gStackModule->is_restarted_syscall()) 745 timeout = gStackModule->restore_syscall_restart_timeout(); 746 else 747 gStackModule->store_syscall_restart_timeout(timeout); 748 749 while (left > 0) { 750 while (fSendQueue.Free() < socket->send.low_water_mark) { 751 // wait until enough space is available 752 status_t status = fSendList.Wait(lock, timeout); 753 if (status < B_OK) { 754 TRACE(" SendData() returning %s (%d)", 755 strerror(posix_error(status)), (int)posix_error(status)); 756 return posix_error(status); 757 } 758 759 if (!is_writable(fState)) { 760 // we only send signals when called from userland 761 if (gStackModule->is_syscall) 762 send_signal(find_thread(NULL), SIGPIPE); 763 return EPIPE; 764 } 765 } 766 767 size_t size = fSendQueue.Free(); 768 if (size < left) { 769 // we need to split the original buffer 770 net_buffer* clone = gBufferModule->clone(buffer, false); 771 // TODO: add offset/size parameter to net_buffer::clone() or 772 // even a move_data() function, as this is a bit inefficient 773 if (clone == NULL) 774 return ENOBUFS; 775 776 status_t status = gBufferModule->trim(clone, size); 777 if (status != B_OK) { 778 gBufferModule->free(clone); 779 return status; 780 } 781 782 gBufferModule->remove_header(buffer, size); 783 left -= size; 784 fSendQueue.Add(clone); 785 } else { 786 left -= buffer->size; 787 fSendQueue.Add(buffer); 788 } 789 } 790 791 TRACE(" SendData(): %lu bytes used.", fSendQueue.Used()); 792 793 bool force = false; 794 if ((flags & MSG_OOB) != 0) { 795 fSendUrgentOffset = fSendQueue.LastSequence(); 796 // RFC 961 specifies that the urgent offset points to the last 797 // byte of urgent data. However, this is commonly implemented as 798 // here, ie. it points to the first byte after the urgent data. 799 force = true; 800 } 801 if ((flags & MSG_EOF) != 0) 802 _Disconnect(false); 803 804 if (fState == ESTABLISHED || fState == FINISH_RECEIVED) 805 _SendQueued(force); 806 807 return B_OK; 808 } 809 810 811 ssize_t 812 TCPEndpoint::SendAvailable() 813 { 814 MutexLocker locker(fLock); 815 816 ssize_t available; 817 818 if (is_writable(fState)) 819 available = fSendQueue.Free(); 820 else 821 available = EPIPE; 822 823 TRACE("SendAvailable(): %li", available); 824 return available; 825 } 826 827 828 status_t 829 TCPEndpoint::FillStat(net_stat *stat) 830 { 831 MutexLocker _(fLock); 832 833 strlcpy(stat->state, name_for_state(fState), sizeof(stat->state)); 834 stat->receive_queue_size = fReceiveQueue.Available(); 835 stat->send_queue_size = fSendQueue.Used(); 836 837 return B_OK; 838 } 839 840 841 status_t 842 TCPEndpoint::ReadData(size_t numBytes, uint32 flags, net_buffer** _buffer) 843 { 844 TRACE("ReadData(%lu bytes, flags 0x%x)", numBytes, (unsigned int)flags); 845 846 MutexLocker locker(fLock); 847 848 *_buffer = NULL; 849 850 if (fState == CLOSED) 851 return ENOTCONN; 852 853 bigtime_t timeout = absolute_timeout(socket->receive.timeout); 854 if (gStackModule->is_restarted_syscall()) 855 timeout = gStackModule->restore_syscall_restart_timeout(); 856 else 857 gStackModule->store_syscall_restart_timeout(timeout); 858 859 if (fState == SYNCHRONIZE_SENT || fState == SYNCHRONIZE_RECEIVED) { 860 if (flags & MSG_DONTWAIT) 861 return B_WOULD_BLOCK; 862 863 status_t status = _WaitForEstablished(locker, timeout); 864 if (status < B_OK) 865 return posix_error(status); 866 } 867 868 size_t dataNeeded = socket->receive.low_water_mark; 869 870 // When MSG_WAITALL is set then the function should block 871 // until the full amount of data can be returned. 872 if (flags & MSG_WAITALL) 873 dataNeeded = numBytes; 874 875 // TODO: add support for urgent data (MSG_OOB) 876 877 while (true) { 878 if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE 879 || fState == TIME_WAIT) { 880 // ``Connection closing''. 881 return B_OK; 882 } 883 884 if (fReceiveQueue.Available() > 0) { 885 if (fReceiveQueue.Available() >= dataNeeded 886 || (fReceiveQueue.PushedData() > 0 887 && fReceiveQueue.PushedData() >= fReceiveQueue.Available())) 888 break; 889 } else if (fState == FINISH_RECEIVED) { 890 // ``If no text is awaiting delivery, the RECEIVE will 891 // get a Connection closing''. 892 return B_OK; 893 } 894 895 if ((flags & MSG_DONTWAIT) != 0 || socket->receive.timeout == 0) 896 return B_WOULD_BLOCK; 897 898 if ((fFlags & FLAG_NO_RECEIVE) != 0) 899 return B_OK; 900 901 status_t status = fReceiveList.Wait(locker, timeout); 902 if (status < B_OK) { 903 // The Open Group base specification mentions that EINTR should be 904 // returned if the recv() is interrupted before _any data_ is 905 // available. So we actually check if there is data, and if so, 906 // push it to the user. 907 if ((status == B_TIMED_OUT || status == B_INTERRUPTED) 908 && fReceiveQueue.Available() > 0) 909 break; 910 911 return posix_error(status); 912 } 913 } 914 915 TRACE(" ReadData(): %lu are available.", fReceiveQueue.Available()); 916 917 if (numBytes < fReceiveQueue.Available()) 918 fReceiveList.Signal(); 919 920 bool clone = (flags & MSG_PEEK) != 0; 921 922 ssize_t receivedBytes = fReceiveQueue.Get(numBytes, !clone, _buffer); 923 924 TRACE(" ReadData(): %lu bytes kept.", fReceiveQueue.Available()); 925 926 // if we are opening the window, check if we should send an ACK 927 if (!clone) 928 SendAcknowledge(false); 929 930 return receivedBytes; 931 } 932 933 934 ssize_t 935 TCPEndpoint::ReadAvailable() 936 { 937 MutexLocker locker(fLock); 938 939 TRACE("ReadAvailable(): %li", _AvailableData()); 940 941 return _AvailableData(); 942 } 943 944 945 status_t 946 TCPEndpoint::SetSendBufferSize(size_t length) 947 { 948 MutexLocker _(fLock); 949 fSendQueue.SetMaxBytes(length); 950 return B_OK; 951 } 952 953 954 status_t 955 TCPEndpoint::SetReceiveBufferSize(size_t length) 956 { 957 MutexLocker _(fLock); 958 fReceiveQueue.SetMaxBytes(length); 959 return B_OK; 960 } 961 962 963 status_t 964 TCPEndpoint::GetOption(int option, void* _value, int* _length) 965 { 966 if (*_length != sizeof(int)) 967 return B_BAD_VALUE; 968 969 int* value = (int*)_value; 970 971 switch (option) { 972 case TCP_NODELAY: 973 if ((fOptions & TCP_NODELAY) != 0) 974 *value = 1; 975 else 976 *value = 0; 977 return B_OK; 978 979 case TCP_MAXSEG: 980 *value = fReceiveMaxSegmentSize; 981 return B_OK; 982 983 default: 984 return B_BAD_VALUE; 985 } 986 } 987 988 989 status_t 990 TCPEndpoint::SetOption(int option, const void* _value, int length) 991 { 992 if (option != TCP_NODELAY) 993 return B_BAD_VALUE; 994 995 if (length != sizeof(int)) 996 return B_BAD_VALUE; 997 998 const int* value = (const int*)_value; 999 1000 MutexLocker _(fLock); 1001 if (*value) 1002 fOptions |= TCP_NODELAY; 1003 else 1004 fOptions &= ~TCP_NODELAY; 1005 1006 return B_OK; 1007 } 1008 1009 1010 // #pragma mark - misc 1011 1012 1013 bool 1014 TCPEndpoint::IsBound() const 1015 { 1016 return !LocalAddress().IsEmpty(true); 1017 } 1018 1019 1020 bool 1021 TCPEndpoint::IsLocal() const 1022 { 1023 return (fFlags & FLAG_LOCAL) != 0; 1024 } 1025 1026 1027 status_t 1028 TCPEndpoint::DelayedAcknowledge() 1029 { 1030 if (gStackModule->cancel_timer(&fDelayedAcknowledgeTimer)) { 1031 // timer was active, send an ACK now (with the exception above, 1032 // we send every other ACK) 1033 return SendAcknowledge(true); 1034 } 1035 1036 gStackModule->set_timer(&fDelayedAcknowledgeTimer, 1037 TCP_DELAYED_ACKNOWLEDGE_TIMEOUT); 1038 return B_OK; 1039 } 1040 1041 1042 status_t 1043 TCPEndpoint::SendAcknowledge(bool force) 1044 { 1045 return _SendQueued(force, 0); 1046 } 1047 1048 1049 void 1050 TCPEndpoint::_StartPersistTimer() 1051 { 1052 gStackModule->set_timer(&fPersistTimer, 1000000LL); 1053 } 1054 1055 1056 void 1057 TCPEndpoint::_EnterTimeWait() 1058 { 1059 TRACE("_EnterTimeWait()\n"); 1060 1061 _CancelConnectionTimers(); 1062 1063 if (fState == TIME_WAIT && IsLocal()) { 1064 // we do not use TIME_WAIT state for local connections 1065 fFlags |= FLAG_DELETE_ON_CLOSE; 1066 return; 1067 } 1068 1069 _UpdateTimeWait(); 1070 } 1071 1072 1073 void 1074 TCPEndpoint::_UpdateTimeWait() 1075 { 1076 gStackModule->set_timer(&fTimeWaitTimer, TCP_MAX_SEGMENT_LIFETIME << 1); 1077 } 1078 1079 1080 void 1081 TCPEndpoint::_CancelConnectionTimers() 1082 { 1083 gStackModule->cancel_timer(&fRetransmitTimer); 1084 gStackModule->cancel_timer(&fPersistTimer); 1085 gStackModule->cancel_timer(&fDelayedAcknowledgeTimer); 1086 } 1087 1088 1089 /*! Sends the FIN flag to the peer when the connection is still open. 1090 Moves the endpoint to the next state depending on where it was. 1091 */ 1092 status_t 1093 TCPEndpoint::_Disconnect(bool closing) 1094 { 1095 tcp_state previousState = fState; 1096 1097 if (fState == SYNCHRONIZE_RECEIVED || fState == ESTABLISHED) 1098 fState = FINISH_SENT; 1099 else if (fState == FINISH_RECEIVED) 1100 fState = WAIT_FOR_FINISH_ACKNOWLEDGE; 1101 else 1102 return B_OK; 1103 1104 T(State(this)); 1105 1106 status_t status = _SendQueued(); 1107 if (status != B_OK) { 1108 fState = previousState; 1109 T(State(this)); 1110 return status; 1111 } 1112 1113 return B_OK; 1114 } 1115 1116 1117 void 1118 TCPEndpoint::_MarkEstablished() 1119 { 1120 fState = ESTABLISHED; 1121 T(State(this)); 1122 1123 if (socket->parent != NULL) { 1124 gSocketModule->set_connected(socket); 1125 release_sem_etc(fAcceptSemaphore, 1, B_DO_NOT_RESCHEDULE); 1126 } 1127 1128 fSendList.Signal(); 1129 } 1130 1131 1132 status_t 1133 TCPEndpoint::_WaitForEstablished(MutexLocker &locker, bigtime_t timeout) 1134 { 1135 // TODO: Checking for CLOSED seems correct, but breaks several neon tests. 1136 // When investigating this, also have a look at _Close() and _HandleReset(). 1137 while (fState < ESTABLISHED/* && fState != CLOSED*/) { 1138 if (socket->error != B_OK) 1139 return socket->error; 1140 1141 status_t status = fSendList.Wait(locker, timeout); 1142 if (status < B_OK) 1143 return status; 1144 } 1145 1146 return B_OK; 1147 } 1148 1149 1150 // #pragma mark - receive 1151 1152 1153 void 1154 TCPEndpoint::_Close() 1155 { 1156 _CancelConnectionTimers(); 1157 fState = CLOSED; 1158 T(State(this)); 1159 1160 fFlags |= FLAG_DELETE_ON_CLOSE; 1161 1162 fSendList.Signal(); 1163 _NotifyReader(); 1164 } 1165 1166 1167 void 1168 TCPEndpoint::_HandleReset(status_t error) 1169 { 1170 socket->error = error; 1171 _Close(); 1172 1173 gSocketModule->notify(socket, B_SELECT_WRITE, error); 1174 gSocketModule->notify(socket, B_SELECT_ERROR, error); 1175 } 1176 1177 1178 void 1179 TCPEndpoint::_DuplicateAcknowledge(tcp_segment_header &segment) 1180 { 1181 if (++fDuplicateAcknowledgeCount < 3) 1182 return; 1183 1184 if (fDuplicateAcknowledgeCount == 3) { 1185 _ResetSlowStart(); 1186 fCongestionWindow = fSlowStartThreshold + 3 1187 * fSendMaxSegmentSize; 1188 fSendNext = segment.acknowledge; 1189 } else if (fDuplicateAcknowledgeCount > 3) 1190 fCongestionWindow += fSendMaxSegmentSize; 1191 1192 _SendQueued(); 1193 } 1194 1195 1196 void 1197 TCPEndpoint::_UpdateTimestamps(tcp_segment_header& segment, 1198 size_t segmentLength) 1199 { 1200 if (fFlags & FLAG_OPTION_TIMESTAMP) { 1201 tcp_sequence sequence(segment.sequence); 1202 1203 if (fLastAcknowledgeSent >= sequence 1204 && fLastAcknowledgeSent < (sequence + segmentLength)) 1205 fReceivedTimestamp = segment.timestamp_value; 1206 } 1207 } 1208 1209 1210 ssize_t 1211 TCPEndpoint::_AvailableData() const 1212 { 1213 // TODO: Refer to the FLAG_NO_RECEIVE comment above regarding 1214 // the application of FLAG_NO_RECEIVE in listen()ing 1215 // sockets. 1216 if (fState == LISTEN) 1217 return gSocketModule->count_connected(socket); 1218 if (fState == SYNCHRONIZE_SENT) 1219 return 0; 1220 1221 ssize_t availableData = fReceiveQueue.Available(); 1222 1223 if (availableData == 0 && !_ShouldReceive()) 1224 return ENOTCONN; 1225 1226 return availableData; 1227 } 1228 1229 1230 void 1231 TCPEndpoint::_NotifyReader() 1232 { 1233 fReceiveList.Signal(); 1234 gSocketModule->notify(socket, B_SELECT_READ, _AvailableData()); 1235 } 1236 1237 1238 bool 1239 TCPEndpoint::_AddData(tcp_segment_header& segment, net_buffer* buffer) 1240 { 1241 fReceiveQueue.Add(buffer, segment.sequence); 1242 fReceiveNext = fReceiveQueue.NextSequence(); 1243 1244 TRACE(" _AddData(): adding data, receive next = %lu. Now have %lu bytes.", 1245 (uint32)fReceiveNext, fReceiveQueue.Available()); 1246 1247 if (segment.flags & TCP_FLAG_PUSH) 1248 fReceiveQueue.SetPushPointer(); 1249 1250 return fReceiveQueue.Available() > 0; 1251 } 1252 1253 1254 void 1255 TCPEndpoint::_PrepareReceivePath(tcp_segment_header& segment) 1256 { 1257 fInitialReceiveSequence = segment.sequence; 1258 1259 // count the received SYN 1260 segment.sequence++; 1261 1262 fReceiveNext = segment.sequence; 1263 fReceiveQueue.SetInitialSequence(segment.sequence); 1264 1265 if ((fOptions & TCP_NOOPT) == 0) { 1266 if (segment.max_segment_size > 0) 1267 fSendMaxSegmentSize = segment.max_segment_size; 1268 1269 if (segment.options & TCP_HAS_WINDOW_SCALE) { 1270 fFlags |= FLAG_OPTION_WINDOW_SCALE; 1271 fSendWindowShift = segment.window_shift; 1272 } else { 1273 fFlags &= ~FLAG_OPTION_WINDOW_SCALE; 1274 fReceiveWindowShift = 0; 1275 } 1276 1277 if (segment.options & TCP_HAS_TIMESTAMPS) { 1278 fFlags |= FLAG_OPTION_TIMESTAMP; 1279 fReceivedTimestamp = segment.timestamp_value; 1280 } else 1281 fFlags &= ~FLAG_OPTION_TIMESTAMP; 1282 } 1283 1284 fCongestionWindow = 2 * fSendMaxSegmentSize; 1285 fSlowStartThreshold = (uint32)segment.advertised_window << fSendWindowShift; 1286 } 1287 1288 1289 bool 1290 TCPEndpoint::_ShouldReceive() const 1291 { 1292 if ((fFlags & FLAG_NO_RECEIVE) != 0) 1293 return false; 1294 1295 return fState == ESTABLISHED || fState == FINISH_SENT 1296 || fState == FINISH_ACKNOWLEDGED; 1297 } 1298 1299 1300 int32 1301 TCPEndpoint::_Spawn(TCPEndpoint* parent, tcp_segment_header& segment, 1302 net_buffer* buffer) 1303 { 1304 MutexLocker _(fLock); 1305 1306 // TODO error checking 1307 ProtocolSocket::Open(); 1308 1309 fState = SYNCHRONIZE_RECEIVED; 1310 T(State(this)); 1311 fManager = parent->fManager; 1312 1313 LocalAddress().SetTo(buffer->destination); 1314 PeerAddress().SetTo(buffer->source); 1315 1316 TRACE("Spawn()"); 1317 1318 // TODO: proper error handling! 1319 if (fManager->BindChild(this) < B_OK) 1320 return DROP; 1321 1322 if (_PrepareSendPath(*PeerAddress()) < B_OK) 1323 return DROP; 1324 1325 fOptions = parent->fOptions; 1326 fAcceptSemaphore = parent->fAcceptSemaphore; 1327 1328 _PrepareReceivePath(segment); 1329 1330 // send SYN+ACK 1331 if (_SendQueued() < B_OK) 1332 return DROP; 1333 1334 segment.flags &= ~TCP_FLAG_SYNCHRONIZE; 1335 // we handled this flag now, it must not be set for further processing 1336 1337 return _Receive(segment, buffer); 1338 } 1339 1340 1341 int32 1342 TCPEndpoint::_ListenReceive(tcp_segment_header& segment, net_buffer* buffer) 1343 { 1344 TRACE("ListenReceive()"); 1345 1346 // Essentially, we accept only TCP_FLAG_SYNCHRONIZE in this state, 1347 // but the error behaviour differs 1348 if (segment.flags & TCP_FLAG_RESET) 1349 return DROP; 1350 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) 1351 return DROP | RESET; 1352 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0) 1353 return DROP; 1354 1355 // TODO: drop broadcast/multicast 1356 1357 // spawn new endpoint for accept() 1358 net_socket* newSocket; 1359 if (gSocketModule->spawn_pending_socket(socket, &newSocket) < B_OK) 1360 return DROP; 1361 1362 return ((TCPEndpoint *)newSocket->first_protocol)->_Spawn(this, 1363 segment, buffer); 1364 } 1365 1366 1367 int32 1368 TCPEndpoint::_SynchronizeSentReceive(tcp_segment_header &segment, 1369 net_buffer *buffer) 1370 { 1371 TRACE("_SynchronizeSentReceive()"); 1372 1373 if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0 1374 && (fInitialSendSequence >= segment.acknowledge 1375 || fSendMax < segment.acknowledge)) 1376 return DROP | RESET; 1377 1378 if (segment.flags & TCP_FLAG_RESET) { 1379 _HandleReset(ECONNREFUSED); 1380 return DROP; 1381 } 1382 1383 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0) 1384 return DROP; 1385 1386 fSendUnacknowledged = segment.acknowledge; 1387 _PrepareReceivePath(segment); 1388 1389 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) { 1390 _MarkEstablished(); 1391 } else { 1392 // simultaneous open 1393 fState = SYNCHRONIZE_RECEIVED; 1394 T(State(this)); 1395 } 1396 1397 segment.flags &= ~TCP_FLAG_SYNCHRONIZE; 1398 // we handled this flag now, it must not be set for further processing 1399 1400 return _Receive(segment, buffer) | IMMEDIATE_ACKNOWLEDGE; 1401 } 1402 1403 1404 int32 1405 TCPEndpoint::_Receive(tcp_segment_header& segment, net_buffer* buffer) 1406 { 1407 uint32 advertisedWindow = (uint32)segment.advertised_window 1408 << fSendWindowShift; 1409 size_t segmentLength = buffer->size; 1410 1411 // First, handle the most common case for uni-directional data transfer 1412 // (known as header prediction - the segment must not change the window, 1413 // and must be the expected sequence, and contain no control flags) 1414 1415 if (fState == ESTABLISHED 1416 && segment.AcknowledgeOnly() 1417 && fReceiveNext == segment.sequence 1418 && advertisedWindow > 0 && advertisedWindow == fSendWindow 1419 && fSendNext == fSendMax) { 1420 _UpdateTimestamps(segment, segmentLength); 1421 1422 if (segmentLength == 0) { 1423 // this is a pure acknowledge segment - we're on the sending end 1424 if (fSendUnacknowledged < segment.acknowledge 1425 && fSendMax >= segment.acknowledge) { 1426 _Acknowledged(segment); 1427 return DROP; 1428 } 1429 } else if (segment.acknowledge == fSendUnacknowledged 1430 && fReceiveQueue.IsContiguous() 1431 && fReceiveQueue.Free() >= segmentLength 1432 && (fFlags & FLAG_NO_RECEIVE) == 0) { 1433 if (_AddData(segment, buffer)) 1434 _NotifyReader(); 1435 1436 return KEEP | ((segment.flags & TCP_FLAG_PUSH) != 0 1437 ? IMMEDIATE_ACKNOWLEDGE : ACKNOWLEDGE); 1438 } 1439 } 1440 1441 // The fast path was not applicable, so we continue with the standard 1442 // processing of the incoming segment 1443 1444 ASSERT(fState != SYNCHRONIZE_SENT && fState != LISTEN); 1445 1446 if (fState != CLOSED && fState != TIME_WAIT) { 1447 // Check sequence number 1448 if (!segment_in_sequence(segment, segmentLength, fReceiveNext, 1449 fReceiveWindow)) { 1450 TRACE(" Receive(): segment out of window, next: %lu wnd: %lu", 1451 (uint32)fReceiveNext, fReceiveWindow); 1452 if (segment.flags & TCP_FLAG_RESET) { 1453 // TODO: this doesn't look right - review! 1454 return DROP; 1455 } 1456 return DROP | IMMEDIATE_ACKNOWLEDGE; 1457 } 1458 } 1459 1460 if (segment.flags & TCP_FLAG_RESET) { 1461 // Is this a valid reset? 1462 // We generally ignore resets in time wait state (see RFC 1337) 1463 if (fLastAcknowledgeSent <= segment.sequence 1464 && tcp_sequence(segment.sequence) < (fLastAcknowledgeSent 1465 + fReceiveWindow) 1466 && fState != TIME_WAIT) { 1467 status_t error; 1468 if (fState == SYNCHRONIZE_RECEIVED) 1469 error = ECONNREFUSED; 1470 else if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE) 1471 error = ENOTCONN; 1472 else 1473 error = ECONNRESET; 1474 1475 _HandleReset(error); 1476 } 1477 1478 return DROP; 1479 } 1480 1481 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0 1482 || (fState == SYNCHRONIZE_RECEIVED 1483 && (fInitialReceiveSequence > segment.sequence 1484 || (segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0 1485 && (fSendUnacknowledged > segment.acknowledge 1486 || fSendMax < segment.acknowledge)))) { 1487 // reset the connection - either the initial SYN was faulty, or we 1488 // received a SYN within the data stream 1489 return DROP | RESET; 1490 } 1491 1492 fReceiveWindow = max_c(fReceiveQueue.Free(), fReceiveWindow); 1493 // the window must not shrink 1494 1495 // trim buffer to be within the receive window 1496 int32 drop = fReceiveNext - segment.sequence; 1497 if (drop > 0) { 1498 if ((uint32)drop > buffer->size 1499 || ((uint32)drop == buffer->size 1500 && (segment.flags & TCP_FLAG_FINISH) == 0)) { 1501 // don't accidently remove a FIN we shouldn't remove 1502 segment.flags &= ~TCP_FLAG_FINISH; 1503 drop = buffer->size; 1504 } 1505 1506 // remove duplicate data at the start 1507 TRACE("* remove %ld bytes from the start", drop); 1508 gBufferModule->remove_header(buffer, drop); 1509 segment.sequence += drop; 1510 } 1511 1512 int32 action = KEEP; 1513 1514 drop = segment.sequence + buffer->size - (fReceiveNext + fReceiveWindow); 1515 if (drop > 0) { 1516 // remove data exceeding our window 1517 if ((uint32)drop >= buffer->size) { 1518 // if we can accept data, or the segment is not what we'd expect, 1519 // drop the segment (an immediate acknowledge is always triggered) 1520 if (fReceiveWindow != 0 || segment.sequence != fReceiveNext) 1521 return DROP | IMMEDIATE_ACKNOWLEDGE; 1522 1523 action |= IMMEDIATE_ACKNOWLEDGE; 1524 } 1525 1526 if ((segment.flags & TCP_FLAG_FINISH) != 0) { 1527 // we need to remove the finish, too, as part of the data 1528 drop--; 1529 } 1530 1531 segment.flags &= ~(TCP_FLAG_FINISH | TCP_FLAG_PUSH); 1532 TRACE("* remove %ld bytes from the end", drop); 1533 gBufferModule->remove_trailer(buffer, drop); 1534 } 1535 1536 #ifdef TRACE_TCP 1537 if (advertisedWindow > fSendWindow) { 1538 TRACE(" Receive(): Window update %lu -> %lu", fSendWindow, 1539 advertisedWindow); 1540 } 1541 #endif 1542 1543 fSendWindow = advertisedWindow; 1544 if (advertisedWindow > fSendMaxWindow) 1545 fSendMaxWindow = advertisedWindow; 1546 1547 // Then look at the acknowledgement for any updates 1548 1549 if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0) { 1550 // process acknowledged data 1551 if (fState == SYNCHRONIZE_RECEIVED) 1552 _MarkEstablished(); 1553 1554 if (fSendMax < segment.acknowledge) 1555 return DROP | IMMEDIATE_ACKNOWLEDGE; 1556 1557 if (segment.acknowledge < fSendUnacknowledged) { 1558 if (buffer->size == 0 && advertisedWindow == fSendWindow 1559 && (segment.flags & TCP_FLAG_FINISH) == 0) { 1560 TRACE("Receive(): duplicate ack!"); 1561 1562 _DuplicateAcknowledge(segment); 1563 } 1564 1565 return DROP; 1566 } else { 1567 // this segment acknowledges in flight data 1568 1569 if (fDuplicateAcknowledgeCount >= 3) { 1570 // deflate the window. 1571 fCongestionWindow = fSlowStartThreshold; 1572 } 1573 1574 fDuplicateAcknowledgeCount = 0; 1575 1576 if (fSendMax == segment.acknowledge) 1577 TRACE("Receive(): all inflight data ack'd!"); 1578 1579 if (segment.acknowledge > fSendQueue.LastSequence() 1580 && fState > ESTABLISHED) { 1581 TRACE("Receive(): FIN has been acknowledged!"); 1582 1583 switch (fState) { 1584 case FINISH_SENT: 1585 fState = FINISH_ACKNOWLEDGED; 1586 T(State(this)); 1587 break; 1588 case CLOSING: 1589 fState = TIME_WAIT; 1590 T(State(this)); 1591 _EnterTimeWait(); 1592 return DROP; 1593 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1594 _Close(); 1595 break; 1596 1597 default: 1598 break; 1599 } 1600 } 1601 1602 if (fState != CLOSED) 1603 _Acknowledged(segment); 1604 } 1605 } 1606 1607 if (segment.flags & TCP_FLAG_URGENT) { 1608 if (fState == ESTABLISHED || fState == FINISH_SENT 1609 || fState == FINISH_ACKNOWLEDGED) { 1610 // TODO: Handle urgent data: 1611 // - RCV.UP <- max(RCV.UP, SEG.UP) 1612 // - signal the user that urgent data is available (SIGURG) 1613 } 1614 } 1615 1616 bool notify = false; 1617 1618 if (buffer->size > 0 && _ShouldReceive()) 1619 notify = _AddData(segment, buffer); 1620 else { 1621 if ((fFlags & FLAG_NO_RECEIVE) != 0) 1622 fReceiveNext += buffer->size; 1623 1624 action = (action & ~KEEP) | DROP; 1625 } 1626 1627 if (segment.flags & TCP_FLAG_FINISH) { 1628 segmentLength++; 1629 if (fState != CLOSED && fState != LISTEN && fState != SYNCHRONIZE_SENT) { 1630 TRACE("Receive(): peer is finishing connection!"); 1631 fReceiveNext++; 1632 notify = true; 1633 1634 // FIN implies PUSH 1635 fReceiveQueue.SetPushPointer(); 1636 1637 // we'll reply immediately to the FIN if we are not 1638 // transitioning to TIME WAIT so we immediatly ACK it. 1639 action |= IMMEDIATE_ACKNOWLEDGE; 1640 1641 // other side is closing connection; change states 1642 switch (fState) { 1643 case ESTABLISHED: 1644 case SYNCHRONIZE_RECEIVED: 1645 fState = FINISH_RECEIVED; 1646 T(State(this)); 1647 break; 1648 case FINISH_SENT: 1649 // simultaneous close 1650 fState = CLOSING; 1651 T(State(this)); 1652 break; 1653 case FINISH_ACKNOWLEDGED: 1654 fState = TIME_WAIT; 1655 T(State(this)); 1656 _EnterTimeWait(); 1657 break; 1658 case TIME_WAIT: 1659 _UpdateTimeWait(); 1660 break; 1661 1662 default: 1663 break; 1664 } 1665 } 1666 } 1667 1668 if (notify) 1669 _NotifyReader(); 1670 1671 if (buffer->size > 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) != 0) 1672 action |= ACKNOWLEDGE; 1673 1674 _UpdateTimestamps(segment, segmentLength); 1675 1676 TRACE("Receive() Action %ld", action); 1677 1678 return action; 1679 } 1680 1681 1682 int32 1683 TCPEndpoint::SegmentReceived(tcp_segment_header& segment, net_buffer* buffer) 1684 { 1685 MutexLocker locker(fLock); 1686 1687 TRACE("SegmentReceived(): buffer %p (%lu bytes) address %s to %s\n" 1688 "\tflags 0x%x, seq %lu, ack %lu, wnd %lu", 1689 buffer, buffer->size, PrintAddress(buffer->source), 1690 PrintAddress(buffer->destination), segment.flags, segment.sequence, 1691 segment.acknowledge, 1692 (uint32)segment.advertised_window << fSendWindowShift); 1693 T(Receive(this, segment, 1694 (uint32)segment.advertised_window << fSendWindowShift, buffer)); 1695 int32 segmentAction = DROP; 1696 1697 switch (fState) { 1698 case LISTEN: 1699 segmentAction = _ListenReceive(segment, buffer); 1700 break; 1701 1702 case SYNCHRONIZE_SENT: 1703 segmentAction = _SynchronizeSentReceive(segment, buffer); 1704 break; 1705 1706 case SYNCHRONIZE_RECEIVED: 1707 case ESTABLISHED: 1708 case FINISH_RECEIVED: 1709 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1710 case FINISH_SENT: 1711 case FINISH_ACKNOWLEDGED: 1712 case CLOSING: 1713 case TIME_WAIT: 1714 case CLOSED: 1715 segmentAction = _Receive(segment, buffer); 1716 break; 1717 } 1718 1719 // process acknowledge action as asked for by the *Receive() method 1720 if (segmentAction & IMMEDIATE_ACKNOWLEDGE) 1721 SendAcknowledge(true); 1722 else if (segmentAction & ACKNOWLEDGE) 1723 DelayedAcknowledge(); 1724 1725 if ((fFlags & (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) 1726 == (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) { 1727 locker.Unlock(); 1728 gSocketModule->delete_socket(socket); 1729 // this will also delete us 1730 } 1731 1732 return segmentAction; 1733 } 1734 1735 1736 // #pragma mark - send 1737 1738 1739 inline uint8 1740 TCPEndpoint::_CurrentFlags() 1741 { 1742 // we don't set FLAG_FINISH here, instead we do it 1743 // conditionally below depending if we are sending 1744 // the last bytes of the send queue. 1745 1746 switch (fState) { 1747 case CLOSED: 1748 return TCP_FLAG_RESET | TCP_FLAG_ACKNOWLEDGE; 1749 1750 case SYNCHRONIZE_SENT: 1751 return TCP_FLAG_SYNCHRONIZE; 1752 case SYNCHRONIZE_RECEIVED: 1753 return TCP_FLAG_SYNCHRONIZE | TCP_FLAG_ACKNOWLEDGE; 1754 1755 case ESTABLISHED: 1756 case FINISH_RECEIVED: 1757 case FINISH_ACKNOWLEDGED: 1758 case TIME_WAIT: 1759 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1760 case FINISH_SENT: 1761 case CLOSING: 1762 return TCP_FLAG_ACKNOWLEDGE; 1763 1764 default: 1765 return 0; 1766 } 1767 } 1768 1769 1770 inline bool 1771 TCPEndpoint::_ShouldSendSegment(tcp_segment_header& segment, uint32 length, 1772 uint32 segmentMaxSize, uint32 flightSize) 1773 { 1774 if (length > 0) { 1775 // Avoid the silly window syndrome - we only send a segment in case: 1776 // - we have a full segment to send, or 1777 // - we're at the end of our buffer queue, or 1778 // - the buffer is at least larger than half of the maximum send window, 1779 // or 1780 // - we're retransmitting data 1781 if (length == segmentMaxSize 1782 || (fOptions & TCP_NODELAY) != 0 1783 || tcp_sequence(fSendNext + length) == fSendQueue.LastSequence() 1784 || (fSendMaxWindow > 0 && length >= fSendMaxWindow / 2)) 1785 return true; 1786 } 1787 1788 // check if we need to send a window update to the peer 1789 if (segment.advertised_window > 0) { 1790 // correct the window to take into account what already has been advertised 1791 uint32 window = (segment.advertised_window << fReceiveWindowShift) 1792 - (fReceiveMaxAdvertised - fReceiveNext); 1793 1794 // if we can advertise a window larger than twice the maximum segment 1795 // size, or half the maximum buffer size we send a window update 1796 if (window >= (fReceiveMaxSegmentSize << 1) 1797 || window >= (socket->receive.buffer_size >> 1)) 1798 return true; 1799 } 1800 1801 if ((segment.flags & (TCP_FLAG_SYNCHRONIZE | TCP_FLAG_FINISH 1802 | TCP_FLAG_RESET)) != 0) 1803 return true; 1804 1805 // We do have urgent data pending 1806 if (fSendUrgentOffset > fSendNext) 1807 return true; 1808 1809 // there is no reason to send a segment just now 1810 return false; 1811 } 1812 1813 1814 status_t 1815 TCPEndpoint::_SendQueued(bool force) 1816 { 1817 return _SendQueued(force, fSendWindow); 1818 } 1819 1820 1821 /*! Sends one or more TCP segments with the data waiting in the queue, or some 1822 specific flags that need to be sent. 1823 */ 1824 status_t 1825 TCPEndpoint::_SendQueued(bool force, uint32 sendWindow) 1826 { 1827 if (fRoute == NULL) 1828 return B_ERROR; 1829 1830 // in passive state? 1831 if (fState == LISTEN) 1832 return B_ERROR; 1833 1834 tcp_segment_header segment(_CurrentFlags()); 1835 1836 if ((fOptions & TCP_NOOPT) == 0) { 1837 if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0) { 1838 segment.options |= TCP_HAS_TIMESTAMPS; 1839 segment.timestamp_reply = fReceivedTimestamp; 1840 segment.timestamp_value = tcp_now(); 1841 } 1842 1843 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0 1844 && fSendNext == fInitialSendSequence) { 1845 // add connection establishment options 1846 segment.max_segment_size = fReceiveMaxSegmentSize; 1847 if (fFlags & FLAG_OPTION_WINDOW_SCALE) { 1848 segment.options |= TCP_HAS_WINDOW_SCALE; 1849 segment.window_shift = fReceiveWindowShift; 1850 } 1851 } 1852 } 1853 1854 size_t availableBytes = fReceiveQueue.Free(); 1855 if (fFlags & FLAG_OPTION_WINDOW_SCALE) 1856 segment.advertised_window = availableBytes >> fReceiveWindowShift; 1857 else 1858 segment.advertised_window = min_c(TCP_MAX_WINDOW, availableBytes); 1859 1860 segment.acknowledge = fReceiveNext; 1861 1862 // Process urgent data 1863 if (fSendUrgentOffset > fSendNext) { 1864 segment.flags |= TCP_FLAG_URGENT; 1865 segment.urgent_offset = fSendUrgentOffset - fSendNext; 1866 } else { 1867 fSendUrgentOffset = fSendUnacknowledged; 1868 // Keep urgent offset updated, so that it doesn't reach into our 1869 // send window on overlap 1870 segment.urgent_offset = 0; 1871 } 1872 1873 if (fCongestionWindow > 0 && fCongestionWindow < sendWindow) 1874 sendWindow = fCongestionWindow; 1875 1876 // fSendUnacknowledged 1877 // | fSendNext fSendMax 1878 // | | | 1879 // v v v 1880 // ----------------------------------- 1881 // | effective window | 1882 // ----------------------------------- 1883 1884 // Flight size represents the window of data which is currently in the 1885 // ether. We should never send data such as the flight size becomes larger 1886 // than the effective window. Note however that the effective window may be 1887 // reduced (by congestion for instance), so at some point in time flight 1888 // size may be larger than the currently calculated window. 1889 1890 uint32 flightSize = fSendMax - fSendUnacknowledged; 1891 uint32 consumedWindow = fSendNext - fSendUnacknowledged; 1892 1893 if (consumedWindow > sendWindow) { 1894 sendWindow = 0; 1895 // TODO enter persist state? try to get a window update. 1896 } else 1897 sendWindow -= consumedWindow; 1898 1899 if (force && sendWindow == 0 && fSendNext <= fSendQueue.LastSequence()) { 1900 // send one byte of data to ask for a window update 1901 // (triggered by the persist timer) 1902 sendWindow = 1; 1903 } 1904 1905 uint32 length = min_c(fSendQueue.Available(fSendNext), sendWindow); 1906 tcp_sequence previousSendNext = fSendNext; 1907 1908 do { 1909 uint32 segmentMaxSize = fSendMaxSegmentSize 1910 - tcp_options_length(segment); 1911 uint32 segmentLength = min_c(length, segmentMaxSize); 1912 1913 if (fSendNext + segmentLength == fSendQueue.LastSequence()) { 1914 if (state_needs_finish(fState)) 1915 segment.flags |= TCP_FLAG_FINISH; 1916 if (length > 0) 1917 segment.flags |= TCP_FLAG_PUSH; 1918 } 1919 1920 // Determine if we should really send this segment 1921 if (!force && !_ShouldSendSegment(segment, segmentLength, 1922 segmentMaxSize, flightSize)) { 1923 if (fSendQueue.Available() 1924 && !gStackModule->is_timer_active(&fPersistTimer) 1925 && !gStackModule->is_timer_active(&fRetransmitTimer)) 1926 _StartPersistTimer(); 1927 break; 1928 } 1929 1930 net_buffer *buffer = gBufferModule->create(256); 1931 if (buffer == NULL) 1932 return B_NO_MEMORY; 1933 1934 status_t status = B_OK; 1935 if (segmentLength > 0) 1936 status = fSendQueue.Get(buffer, fSendNext, segmentLength); 1937 if (status < B_OK) { 1938 gBufferModule->free(buffer); 1939 return status; 1940 } 1941 1942 LocalAddress().CopyTo(buffer->source); 1943 PeerAddress().CopyTo(buffer->destination); 1944 1945 uint32 size = buffer->size; 1946 segment.sequence = fSendNext; 1947 1948 TRACE("SendQueued(): buffer %p (%lu bytes) address %s to %s\n" 1949 "\tflags 0x%x, seq %lu, ack %lu, rwnd %hu, cwnd %lu, ssthresh %lu\n" 1950 "\tlen %lu first %lu last %lu", 1951 buffer, buffer->size, PrintAddress(buffer->source), 1952 PrintAddress(buffer->destination), segment.flags, segment.sequence, 1953 segment.acknowledge, segment.advertised_window, 1954 fCongestionWindow, fSlowStartThreshold, segmentLength, 1955 (uint32)fSendQueue.FirstSequence(), 1956 (uint32)fSendQueue.LastSequence()); 1957 T(Send(this, segment, buffer, fSendQueue.FirstSequence(), 1958 fSendQueue.LastSequence())); 1959 1960 PROBE(buffer, sendWindow); 1961 sendWindow -= buffer->size; 1962 1963 status = add_tcp_header(AddressModule(), segment, buffer); 1964 if (status != B_OK) { 1965 gBufferModule->free(buffer); 1966 return status; 1967 } 1968 1969 // Update send status - we need to do this before we send the data 1970 // for local connections as the answer is directly handled 1971 1972 if (segment.flags & TCP_FLAG_SYNCHRONIZE) { 1973 segment.options &= ~TCP_HAS_WINDOW_SCALE; 1974 segment.max_segment_size = 0; 1975 size++; 1976 } 1977 1978 if (segment.flags & TCP_FLAG_FINISH) 1979 size++; 1980 1981 uint32 sendMax = fSendMax; 1982 fSendNext += size; 1983 if (fSendMax < fSendNext) 1984 fSendMax = fSendNext; 1985 1986 fReceiveMaxAdvertised = fReceiveNext 1987 + ((uint32)segment.advertised_window << fReceiveWindowShift); 1988 1989 status = next->module->send_routed_data(next, fRoute, buffer); 1990 if (status < B_OK) { 1991 gBufferModule->free(buffer); 1992 1993 fSendNext = segment.sequence; 1994 fSendMax = sendMax; 1995 // restore send status 1996 return status; 1997 } 1998 1999 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) 2000 fLastAcknowledgeSent = segment.acknowledge; 2001 2002 length -= segmentLength; 2003 segment.flags &= ~(TCP_FLAG_SYNCHRONIZE | TCP_FLAG_RESET 2004 | TCP_FLAG_FINISH); 2005 } while (length > 0); 2006 2007 // if we sent data from the beggining of the send queue, 2008 // start the retransmition timer 2009 if (previousSendNext == fSendUnacknowledged 2010 && fSendNext > previousSendNext) { 2011 TRACE(" SendQueue(): set retransmit timer with rto %llu", 2012 fRetransmitTimeout); 2013 2014 gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout); 2015 } 2016 2017 return B_OK; 2018 } 2019 2020 2021 int 2022 TCPEndpoint::_MaxSegmentSize(const sockaddr* address) const 2023 { 2024 return next->module->get_mtu(next, address) - sizeof(tcp_header); 2025 } 2026 2027 2028 status_t 2029 TCPEndpoint::_PrepareSendPath(const sockaddr* peer) 2030 { 2031 if (fRoute == NULL) { 2032 fRoute = gDatalinkModule->get_route(Domain(), peer); 2033 if (fRoute == NULL) 2034 return ENETUNREACH; 2035 2036 if ((fRoute->flags & RTF_LOCAL) != 0) 2037 fFlags |= FLAG_LOCAL; 2038 } 2039 2040 // make sure connection does not already exist 2041 status_t status = fManager->SetConnection(this, *LocalAddress(), peer, 2042 fRoute->interface->address); 2043 if (status < B_OK) 2044 return status; 2045 2046 fInitialSendSequence = system_time() >> 4; 2047 fSendNext = fInitialSendSequence; 2048 fSendUnacknowledged = fInitialSendSequence; 2049 fSendMax = fInitialSendSequence; 2050 fSendUrgentOffset = fInitialSendSequence; 2051 2052 // we are counting the SYN here 2053 fSendQueue.SetInitialSequence(fSendNext + 1); 2054 2055 fReceiveMaxSegmentSize = _MaxSegmentSize(peer); 2056 2057 // Compute the window shift we advertise to our peer - if it doesn't support 2058 // this option, this will be reset to 0 (when its SYN is received) 2059 fReceiveWindowShift = 0; 2060 while (fReceiveWindowShift < TCP_MAX_WINDOW_SHIFT 2061 && (0xffffUL << fReceiveWindowShift) < socket->receive.buffer_size) { 2062 fReceiveWindowShift++; 2063 } 2064 2065 return B_OK; 2066 } 2067 2068 2069 void 2070 TCPEndpoint::_Acknowledged(tcp_segment_header& segment) 2071 { 2072 size_t previouslyUsed = fSendQueue.Used(); 2073 2074 fSendQueue.RemoveUntil(segment.acknowledge); 2075 fSendUnacknowledged = segment.acknowledge; 2076 2077 if (fSendNext < fSendUnacknowledged) 2078 fSendNext = fSendUnacknowledged; 2079 2080 if (fSendUnacknowledged == fSendMax) 2081 gStackModule->cancel_timer(&fRetransmitTimer); 2082 2083 if (fSendQueue.Used() < previouslyUsed) { 2084 // this ACK acknowledged data 2085 2086 if (segment.options & TCP_HAS_TIMESTAMPS) 2087 _UpdateRoundTripTime(tcp_diff_timestamp(segment.timestamp_reply)); 2088 else { 2089 // TODO: Fallback to RFC 793 type estimation 2090 } 2091 2092 if (is_writable(fState)) { 2093 // notify threads waiting on the socket to become writable again 2094 fSendList.Signal(); 2095 gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Used()); 2096 } 2097 2098 if (fCongestionWindow < fSlowStartThreshold) 2099 fCongestionWindow += fSendMaxSegmentSize; 2100 } 2101 2102 if (fCongestionWindow >= fSlowStartThreshold) { 2103 uint32 increment = fSendMaxSegmentSize * fSendMaxSegmentSize; 2104 2105 if (increment < fCongestionWindow) 2106 increment = 1; 2107 else 2108 increment /= fCongestionWindow; 2109 2110 fCongestionWindow += increment; 2111 } 2112 2113 // if there is data left to be send, send it now 2114 if (fSendQueue.Used() > 0) 2115 _SendQueued(); 2116 } 2117 2118 2119 void 2120 TCPEndpoint::_Retransmit() 2121 { 2122 TRACE("Retransmit()"); 2123 _ResetSlowStart(); 2124 fSendNext = fSendUnacknowledged; 2125 _SendQueued(); 2126 } 2127 2128 2129 void 2130 TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime) 2131 { 2132 int32 rtt = roundTripTime; 2133 2134 // "smooth" round trip time as per Van Jacobson 2135 rtt -= fRoundTripTime / 8; 2136 fRoundTripTime += rtt; 2137 if (rtt < 0) 2138 rtt = -rtt; 2139 rtt -= fRoundTripDeviation / 4; 2140 fRoundTripDeviation += rtt; 2141 2142 fRetransmitTimeout = ((fRoundTripTime / 4 + fRoundTripDeviation) / 2) 2143 * kTimestampFactor; 2144 2145 TRACE(" RTO is now %llu (after rtt %ldms)", fRetransmitTimeout, 2146 roundTripTime); 2147 } 2148 2149 2150 void 2151 TCPEndpoint::_ResetSlowStart() 2152 { 2153 fSlowStartThreshold = max_c((fSendMax - fSendUnacknowledged) / 2, 2154 2 * fSendMaxSegmentSize); 2155 fCongestionWindow = fSendMaxSegmentSize; 2156 } 2157 2158 2159 // #pragma mark - timer 2160 2161 2162 /*static*/ void 2163 TCPEndpoint::_RetransmitTimer(net_timer* timer, void* _endpoint) 2164 { 2165 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2166 T(Timer(endpoint, "retransmit")); 2167 2168 MutexLocker locker(endpoint->fLock); 2169 if (!locker.IsLocked()) 2170 return; 2171 2172 endpoint->_Retransmit(); 2173 } 2174 2175 2176 /*static*/ void 2177 TCPEndpoint::_PersistTimer(net_timer* timer, void* _endpoint) 2178 { 2179 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2180 T(Timer(endpoint, "persist")); 2181 2182 MutexLocker locker(endpoint->fLock); 2183 if (!locker.IsLocked()) 2184 return; 2185 2186 // the timer might not have been canceled early enough 2187 if (endpoint->State() == CLOSED) 2188 return; 2189 2190 endpoint->_SendQueued(true); 2191 } 2192 2193 2194 /*static*/ void 2195 TCPEndpoint::_DelayedAcknowledgeTimer(net_timer* timer, void* _endpoint) 2196 { 2197 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2198 T(Timer(endpoint, "delayed ack")); 2199 2200 MutexLocker locker(endpoint->fLock); 2201 if (!locker.IsLocked()) 2202 return; 2203 2204 // the timer might not have been canceled early enough 2205 if (endpoint->State() == CLOSED) 2206 return; 2207 2208 endpoint->SendAcknowledge(true); 2209 } 2210 2211 2212 /*static*/ void 2213 TCPEndpoint::_TimeWaitTimer(net_timer* timer, void* _endpoint) 2214 { 2215 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2216 T(Timer(endpoint, "time-wait")); 2217 2218 MutexLocker locker(endpoint->fLock); 2219 if (!locker.IsLocked()) 2220 return; 2221 2222 if ((endpoint->fFlags & FLAG_CLOSED) == 0) { 2223 endpoint->fFlags |= FLAG_DELETE_ON_CLOSE; 2224 return; 2225 } 2226 2227 locker.Unlock(); 2228 2229 gSocketModule->delete_socket(endpoint->socket); 2230 } 2231 2232 2233 // #pragma mark - 2234 2235 2236 void 2237 TCPEndpoint::Dump() const 2238 { 2239 kprintf("TCP endpoint %p\n", this); 2240 kprintf(" state: %s\n", name_for_state(fState)); 2241 kprintf(" flags: 0x%lx\n", fFlags); 2242 #ifdef KDEBUG 2243 kprintf(" lock: { %p, holder: %ld }\n", &fLock, fLock.holder); 2244 #endif 2245 kprintf(" accept sem: %ld\n", fAcceptSemaphore); 2246 kprintf(" options: 0x%lx\n", (uint32)fOptions); 2247 kprintf(" send\n"); 2248 kprintf(" window shift: %lu\n", (uint32)fSendWindowShift); 2249 kprintf(" unacknowledged: %lu\n", (uint32)fSendUnacknowledged); 2250 kprintf(" next: %lu\n", (uint32)fSendNext); 2251 kprintf(" max: %lu\n", (uint32)fSendMax); 2252 kprintf(" urgent offset: %lu\n", (uint32)fSendUrgentOffset); 2253 kprintf(" window: %lu\n", fSendWindow); 2254 kprintf(" max window: %lu\n", fSendMaxWindow); 2255 kprintf(" max segment size: %lu\n", fSendMaxSegmentSize); 2256 kprintf(" queue: %lu / %lu\n", fSendQueue.Used(), fSendQueue.Size()); 2257 kprintf(" last acknowledge sent: %lu\n", (uint32)fLastAcknowledgeSent); 2258 kprintf(" initial sequence: %lu\n", (uint32)fInitialSendSequence); 2259 kprintf(" receive\n"); 2260 kprintf(" window shift: %lu\n", (uint32)fReceiveWindowShift); 2261 kprintf(" next: %lu\n", (uint32)fReceiveNext); 2262 kprintf(" max advertised: %lu\n", (uint32)fReceiveMaxAdvertised); 2263 kprintf(" window: %lu\n", (uint32)fReceiveWindow); 2264 kprintf(" max segment size: %lu\n", (uint32)fReceiveMaxSegmentSize); 2265 kprintf(" queue: %lu / %lu\n", fReceiveQueue.Available(), 2266 fReceiveQueue.Size()); 2267 kprintf(" initial sequence: %lu\n", (uint32)fInitialReceiveSequence); 2268 kprintf(" duplicate acknowledge count: %lu\n", 2269 fDuplicateAcknowledgeCount); 2270 kprintf(" round trip time: %ld (deviation %ld)\n", fRoundTripTime, 2271 fRoundTripDeviation); 2272 kprintf(" retransmit timeout: %llu\n", (uint64)fRetransmitTimeout); 2273 kprintf(" congestion window: %lu\n", fCongestionWindow); 2274 kprintf(" slow start threshold: %lu\n", fSlowStartThreshold); 2275 } 2276 2277