1 /* 2 * Copyright 2006-2009, Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Andrew Galante, haiku.galante@gmail.com 7 * Axel Dörfler, axeld@pinc-software.de 8 * Hugo Santos, hugosantos@gmail.com 9 */ 10 11 12 #include "TCPEndpoint.h" 13 14 #include <netinet/in.h> 15 #include <netinet/ip.h> 16 #include <netinet/tcp.h> 17 #include <new> 18 #include <signal.h> 19 #include <stdlib.h> 20 #include <string.h> 21 22 #include <KernelExport.h> 23 #include <Select.h> 24 25 #include <net_buffer.h> 26 #include <net_datalink.h> 27 #include <net_stat.h> 28 #include <NetBufferUtilities.h> 29 #include <NetUtilities.h> 30 31 #include <lock.h> 32 #include <tracing.h> 33 #include <util/AutoLock.h> 34 #include <util/khash.h> 35 #include <util/list.h> 36 37 #include "EndpointManager.h" 38 39 40 // References: 41 // - RFC 793 - Transmission Control Protocol 42 // - RFC 813 - Window and Acknowledgement Strategy in TCP 43 // - RFC 1337 - TIME_WAIT Assassination Hazards in TCP 44 // 45 // Things this implementation currently doesn't implement: 46 // - TCP Slow Start, Congestion Avoidance, Fast Retransmit, and Fast Recovery, 47 // RFC 2001, RFC 2581, RFC 3042 48 // - NewReno Modification to TCP's Fast Recovery, RFC 2582 49 // - Explicit Congestion Notification (ECN), RFC 3168 50 // - SYN-Cache 51 // - TCP Extensions for High Performance, RFC 1323 52 // - SACK, Selective Acknowledgment - RFC 2018, RFC 2883, RFC 3517 53 // - Forward RTO-Recovery, RFC 4138 54 // - Time-Wait hash instead of keeping sockets alive 55 56 #define PrintAddress(address) \ 57 AddressString(Domain(), address, true).Data() 58 59 //#define TRACE_TCP 60 //#define PROBE_TCP 61 62 #ifdef TRACE_TCP 63 // the space before ', ##args' is important in order for this to work with cpp 2.95 64 # define TRACE(format, args...) dprintf("%ld: TCP [%llu] %p (%12s) " \ 65 format "\n", find_thread(NULL), system_time(), this, \ 66 name_for_state(fState) , ##args) 67 #else 68 # define TRACE(args...) do { } while (0) 69 #endif 70 71 #ifdef PROBE_TCP 72 # define PROBE(buffer, window) \ 73 dprintf("TCP PROBE %llu %s %s %ld snxt %lu suna %lu cw %lu sst %lu win %lu swin %lu smax-suna %lu savail %lu sqused %lu rto %llu\n", \ 74 system_time(), PrintAddress(buffer->source), \ 75 PrintAddress(buffer->destination), buffer->size, (uint32)fSendNext, \ 76 (uint32)fSendUnacknowledged, fCongestionWindow, fSlowStartThreshold, \ 77 window, fSendWindow, (uint32)(fSendMax - fSendUnacknowledged), \ 78 fSendQueue.Available(fSendNext), fSendQueue.Used(), fRetransmitTimeout) 79 #else 80 # define PROBE(buffer, window) do { } while (0) 81 #endif 82 83 #if TCP_TRACING 84 namespace TCPTracing { 85 86 class Receive : public AbstractTraceEntry { 87 public: 88 Receive(TCPEndpoint* endpoint, tcp_segment_header& segment, uint32 window, 89 net_buffer* buffer) 90 : 91 fEndpoint(endpoint), 92 fBuffer(buffer), 93 fBufferSize(buffer->size), 94 fSequence(segment.sequence), 95 fAcknowledge(segment.acknowledge), 96 fWindow(window), 97 fState(endpoint->State()), 98 fFlags(segment.flags) 99 { 100 Initialized(); 101 } 102 103 virtual void AddDump(TraceOutput& out) 104 { 105 out.Print("tcp:%p (%12s) receive buffer %p (%lu bytes), flags %x, " 106 "seq %lu, ack %lu, wnd %lu", fEndpoint, name_for_state(fState), 107 fBuffer, fBufferSize, fFlags, fSequence, fAcknowledge, fWindow); 108 } 109 110 protected: 111 TCPEndpoint* fEndpoint; 112 net_buffer* fBuffer; 113 uint32 fBufferSize; 114 uint32 fSequence; 115 uint32 fAcknowledge; 116 uint32 fWindow; 117 tcp_state fState; 118 uint8 fFlags; 119 }; 120 121 class Send : public AbstractTraceEntry { 122 public: 123 Send(TCPEndpoint* endpoint, tcp_segment_header& segment, net_buffer* buffer, 124 tcp_sequence firstSequence, tcp_sequence lastSequence) 125 : 126 fEndpoint(endpoint), 127 fBuffer(buffer), 128 fBufferSize(buffer->size), 129 fSequence(segment.sequence), 130 fAcknowledge(segment.acknowledge), 131 fFirstSequence(firstSequence.Number()), 132 fLastSequence(lastSequence.Number()), 133 fState(endpoint->State()), 134 fFlags(segment.flags) 135 { 136 Initialized(); 137 } 138 139 virtual void AddDump(TraceOutput& out) 140 { 141 out.Print("tcp:%p (%12s) send buffer %p (%lu bytes), flags %x, " 142 "seq %lu, ack %lu, first %lu, last %lu", 143 fEndpoint, name_for_state(fState), fBuffer, fBufferSize, fFlags, 144 fSequence, fAcknowledge, fFirstSequence, fLastSequence); 145 } 146 147 protected: 148 TCPEndpoint* fEndpoint; 149 net_buffer* fBuffer; 150 uint32 fBufferSize; 151 uint32 fSequence; 152 uint32 fAcknowledge; 153 uint32 fFirstSequence; 154 uint32 fLastSequence; 155 tcp_state fState; 156 uint8 fFlags; 157 }; 158 159 class State : public AbstractTraceEntry { 160 public: 161 State(TCPEndpoint* endpoint) 162 : 163 fEndpoint(endpoint), 164 fState(endpoint->State()) 165 { 166 Initialized(); 167 } 168 169 virtual void AddDump(TraceOutput& out) 170 { 171 out.Print("tcp:%p (%12s) state change", fEndpoint, 172 name_for_state(fState)); 173 } 174 175 protected: 176 TCPEndpoint* fEndpoint; 177 tcp_state fState; 178 }; 179 180 class Spawn : public AbstractTraceEntry { 181 public: 182 Spawn(TCPEndpoint* listeningEndpoint, TCPEndpoint* spawnedEndpoint) 183 : 184 fListeningEndpoint(listeningEndpoint), 185 fSpawnedEndpoint(spawnedEndpoint) 186 { 187 Initialized(); 188 } 189 190 virtual void AddDump(TraceOutput& out) 191 { 192 out.Print("tcp:%p spawns %p", fListeningEndpoint, fSpawnedEndpoint); 193 } 194 195 protected: 196 TCPEndpoint* fListeningEndpoint; 197 TCPEndpoint* fSpawnedEndpoint; 198 }; 199 200 class Error : public AbstractTraceEntry { 201 public: 202 Error(TCPEndpoint* endpoint, const char* error, int32 line) 203 : 204 fEndpoint(endpoint), 205 fLine(line), 206 fError(error), 207 fState(endpoint->State()) 208 { 209 Initialized(); 210 } 211 212 virtual void AddDump(TraceOutput& out) 213 { 214 out.Print("tcp:%p (%12s) error at line %ld: %s", fEndpoint, 215 name_for_state(fState), fLine, fError); 216 } 217 218 protected: 219 TCPEndpoint* fEndpoint; 220 int32 fLine; 221 const char* fError; 222 tcp_state fState; 223 }; 224 225 class Timer : public AbstractTraceEntry { 226 public: 227 Timer(TCPEndpoint* endpoint, const char* which) 228 : 229 fEndpoint(endpoint), 230 fWhich(which), 231 fState(endpoint->State()) 232 { 233 Initialized(); 234 } 235 236 virtual void AddDump(TraceOutput& out) 237 { 238 out.Print("tcp:%p (%12s) %s timer", fEndpoint, 239 name_for_state(fState), fWhich); 240 } 241 242 protected: 243 TCPEndpoint* fEndpoint; 244 const char* fWhich; 245 tcp_state fState; 246 }; 247 248 } // namespace TCPTracing 249 250 # define T(x) new(std::nothrow) TCPTracing::x 251 #else 252 # define T(x) 253 #endif // TCP_TRACING 254 255 // Initial estimate for packet round trip time (RTT) 256 #define TCP_INITIAL_RTT 2000000 257 258 // constants for the fFlags field 259 enum { 260 FLAG_OPTION_WINDOW_SCALE = 0x01, 261 FLAG_OPTION_TIMESTAMP = 0x02, 262 // TODO: Should FLAG_NO_RECEIVE apply as well to received connections? 263 // That is, what is expected from accept() after a shutdown() 264 // is performed on a listen()ing socket. 265 FLAG_NO_RECEIVE = 0x04, 266 FLAG_CLOSED = 0x08, 267 FLAG_DELETE_ON_CLOSE = 0x10, 268 FLAG_LOCAL = 0x20 269 }; 270 271 272 static const int kTimestampFactor = 1024; 273 274 275 static inline bigtime_t 276 absolute_timeout(bigtime_t timeout) 277 { 278 if (timeout == 0 || timeout == B_INFINITE_TIMEOUT) 279 return timeout; 280 281 return timeout + system_time(); 282 } 283 284 285 static inline status_t 286 posix_error(status_t error) 287 { 288 if (error == B_TIMED_OUT) 289 return B_WOULD_BLOCK; 290 291 return error; 292 } 293 294 295 static inline bool 296 in_window(const tcp_sequence& sequence, const tcp_sequence& receiveNext, 297 uint32 receiveWindow) 298 { 299 return sequence >= receiveNext && sequence < (receiveNext + receiveWindow); 300 } 301 302 303 static inline bool 304 segment_in_sequence(const tcp_segment_header& segment, int size, 305 const tcp_sequence& receiveNext, uint32 receiveWindow) 306 { 307 tcp_sequence sequence(segment.sequence); 308 if (size == 0) { 309 if (receiveWindow == 0) 310 return sequence == receiveNext; 311 return in_window(sequence, receiveNext, receiveWindow); 312 } else { 313 if (receiveWindow == 0) 314 return false; 315 return in_window(sequence, receiveNext, receiveWindow) 316 || in_window(sequence + size - 1, receiveNext, receiveWindow); 317 } 318 } 319 320 321 static inline bool 322 is_writable(tcp_state state) 323 { 324 return state == SYNCHRONIZE_SENT || state == SYNCHRONIZE_RECEIVED 325 || state == ESTABLISHED || state == FINISH_RECEIVED; 326 } 327 328 329 static inline uint32 tcp_now() 330 { 331 return system_time() / kTimestampFactor; 332 } 333 334 335 static inline uint32 tcp_diff_timestamp(uint32 base) 336 { 337 uint32 now = tcp_now(); 338 339 if (now > base) 340 return now - base; 341 342 return now + UINT_MAX - base; 343 } 344 345 346 static inline bool 347 state_needs_finish(int32 state) 348 { 349 return state == WAIT_FOR_FINISH_ACKNOWLEDGE 350 || state == FINISH_SENT || state == CLOSING; 351 } 352 353 354 // #pragma mark - 355 356 357 WaitList::WaitList(const char* name) 358 { 359 fCondition = 0; 360 fSem = create_sem(0, name); 361 } 362 363 364 WaitList::~WaitList() 365 { 366 delete_sem(fSem); 367 } 368 369 370 status_t 371 WaitList::InitCheck() const 372 { 373 return fSem; 374 } 375 376 377 status_t 378 WaitList::Wait(MutexLocker& locker, bigtime_t timeout) 379 { 380 locker.Unlock(); 381 382 status_t status = B_OK; 383 384 while (!atomic_test_and_set(&fCondition, 0, 1)) { 385 status = acquire_sem_etc(fSem, 1, B_ABSOLUTE_TIMEOUT | B_CAN_INTERRUPT, 386 timeout); 387 if (status != B_OK) 388 break; 389 } 390 391 locker.Lock(); 392 return status; 393 } 394 395 396 void 397 WaitList::Signal() 398 { 399 atomic_or(&fCondition, 1); 400 #ifdef __HAIKU__ 401 release_sem_etc(fSem, 1, B_DO_NOT_RESCHEDULE | B_RELEASE_ALL); 402 #else 403 int32 count; 404 if (get_sem_count(fSem, &count) == B_OK && count < 0) 405 release_sem_etc(fSem, -count, B_DO_NOT_RESCHEDULE); 406 #endif 407 } 408 409 410 // #pragma mark - 411 412 413 TCPEndpoint::TCPEndpoint(net_socket* socket) 414 : ProtocolSocket(socket), 415 fManager(NULL), 416 fReceiveList("tcp receive"), 417 fSendList("tcp send"), 418 fOptions(0), 419 fSendWindowShift(0), 420 fReceiveWindowShift(0), 421 fSendUnacknowledged(0), 422 fSendNext(0), 423 fSendMax(0), 424 fSendUrgentOffset(0), 425 fSendWindow(0), 426 fSendMaxWindow(0), 427 fSendMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), 428 fSendQueue(socket->send.buffer_size), 429 fInitialSendSequence(0), 430 fDuplicateAcknowledgeCount(0), 431 fRoute(NULL), 432 fReceiveNext(0), 433 fReceiveMaxAdvertised(0), 434 fReceiveWindow(socket->receive.buffer_size), 435 fReceiveMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), 436 fReceiveQueue(socket->receive.buffer_size), 437 fRoundTripTime(TCP_INITIAL_RTT / kTimestampFactor), 438 fRoundTripDeviation(TCP_INITIAL_RTT / kTimestampFactor), 439 fRetransmitTimeout(TCP_INITIAL_RTT), 440 fReceivedTimestamp(0), 441 fCongestionWindow(0), 442 fSlowStartThreshold(0), 443 fState(CLOSED), 444 fFlags(FLAG_OPTION_WINDOW_SCALE | FLAG_OPTION_TIMESTAMP) 445 { 446 // TODO: to be replaced with a real read/write locking strategy! 447 mutex_init(&fLock, "tcp lock"); 448 449 gStackModule->init_timer(&fPersistTimer, TCPEndpoint::_PersistTimer, this); 450 gStackModule->init_timer(&fRetransmitTimer, TCPEndpoint::_RetransmitTimer, 451 this); 452 gStackModule->init_timer(&fDelayedAcknowledgeTimer, 453 TCPEndpoint::_DelayedAcknowledgeTimer, this); 454 gStackModule->init_timer(&fTimeWaitTimer, TCPEndpoint::_TimeWaitTimer, 455 this); 456 } 457 458 459 TCPEndpoint::~TCPEndpoint() 460 { 461 mutex_lock(&fLock); 462 463 _CancelConnectionTimers(); 464 gStackModule->cancel_timer(&fTimeWaitTimer); 465 466 if (fManager != NULL) { 467 fManager->Unbind(this); 468 put_endpoint_manager(fManager); 469 } 470 471 mutex_destroy(&fLock); 472 473 // we need to wait for all timers to return 474 gStackModule->wait_for_timer(&fRetransmitTimer); 475 gStackModule->wait_for_timer(&fPersistTimer); 476 gStackModule->wait_for_timer(&fDelayedAcknowledgeTimer); 477 gStackModule->wait_for_timer(&fTimeWaitTimer); 478 479 gDatalinkModule->put_route(Domain(), fRoute); 480 } 481 482 483 status_t 484 TCPEndpoint::InitCheck() const 485 { 486 if (fReceiveList.InitCheck() < B_OK) 487 return fReceiveList.InitCheck(); 488 489 if (fSendList.InitCheck() < B_OK) 490 return fSendList.InitCheck(); 491 492 return B_OK; 493 } 494 495 496 // #pragma mark - protocol API 497 498 499 status_t 500 TCPEndpoint::Open() 501 { 502 TRACE("Open()"); 503 504 status_t status = ProtocolSocket::Open(); 505 if (status < B_OK) 506 return status; 507 508 fManager = get_endpoint_manager(Domain()); 509 if (fManager == NULL) 510 return EAFNOSUPPORT; 511 512 return B_OK; 513 } 514 515 516 status_t 517 TCPEndpoint::Close() 518 { 519 TRACE("Close()"); 520 521 MutexLocker locker(fLock); 522 523 if (fState == LISTEN) 524 delete_sem(fAcceptSemaphore); 525 526 if (fState == SYNCHRONIZE_SENT || fState == LISTEN) { 527 // TODO: what about linger in case of SYNCHRONIZE_SENT? 528 fState = CLOSED; 529 T(State(this)); 530 return B_OK; 531 } 532 533 status_t status = _Disconnect(true); 534 if (status != B_OK) 535 return status; 536 537 if (socket->options & SO_LINGER) { 538 TRACE("Close(): Lingering for %i secs", socket->linger); 539 540 bigtime_t maximum = absolute_timeout(socket->linger * 1000000LL); 541 542 while (fSendQueue.Used() > 0) { 543 status = fSendList.Wait(locker, maximum); 544 if (status == B_TIMED_OUT || status == B_WOULD_BLOCK) 545 break; 546 else if (status < B_OK) 547 return status; 548 } 549 550 TRACE("Close(): after waiting, the SendQ was left with %lu bytes.", 551 fSendQueue.Used()); 552 } 553 return B_OK; 554 } 555 556 557 void 558 TCPEndpoint::Free() 559 { 560 TRACE("Free()"); 561 562 MutexLocker _(fLock); 563 564 if (fState <= SYNCHRONIZE_SENT) 565 return; 566 567 // we are only interested in the timer, not in changing state 568 _EnterTimeWait(); 569 570 fFlags |= FLAG_CLOSED; 571 if ((fFlags & FLAG_DELETE_ON_CLOSE) == 0) { 572 // we'll be freed later when the 2MSL timer expires 573 gSocketModule->acquire_socket(socket); 574 } 575 } 576 577 578 /*! Creates and sends a synchronize packet to /a address, and then waits 579 until the connection has been established or refused. 580 */ 581 status_t 582 TCPEndpoint::Connect(const sockaddr* address) 583 { 584 TRACE("Connect() on address %s", PrintAddress(address)); 585 586 MutexLocker locker(fLock); 587 588 if (gStackModule->is_restarted_syscall()) { 589 bigtime_t timeout = gStackModule->restore_syscall_restart_timeout(); 590 status_t status = _WaitForEstablished(locker, timeout); 591 TRACE(" Connect(): Connection complete: %s (timeout was %llu)", 592 strerror(status), timeout); 593 return posix_error(status); 594 } 595 596 // Can only call connect() from CLOSED or LISTEN states 597 // otherwise endpoint is considered already connected 598 if (fState == LISTEN) { 599 // this socket is about to connect; remove pending connections in the backlog 600 gSocketModule->set_max_backlog(socket, 0); 601 } else if (fState == ESTABLISHED) { 602 return EISCONN; 603 } else if (fState != CLOSED) 604 return EINPROGRESS; 605 606 // TODO: this is IPv4 specific, and doesn't belong here! 607 // consider destination address INADDR_ANY as INADDR_LOOPBACK 608 sockaddr_in _address; 609 if (((sockaddr_in*)address)->sin_addr.s_addr == INADDR_ANY) { 610 memcpy(&_address, address, sizeof(sockaddr_in)); 611 _address.sin_len = sizeof(sockaddr_in); 612 _address.sin_addr.s_addr = INADDR_LOOPBACK; 613 address = (sockaddr*)&_address; 614 } 615 616 status_t status = _PrepareSendPath(address); 617 if (status < B_OK) 618 return status; 619 620 TRACE(" Connect(): starting 3-way handshake..."); 621 622 fState = SYNCHRONIZE_SENT; 623 T(State(this)); 624 625 // send SYN 626 status = _SendQueued(); 627 if (status != B_OK) { 628 _Close(); 629 return status; 630 } 631 632 // If we are running over Loopback, after _SendQueued() returns we 633 // may be in ESTABLISHED already. 634 if (fState == ESTABLISHED) { 635 TRACE(" Connect() completed after _SendQueued()"); 636 return B_OK; 637 } 638 639 // wait until 3-way handshake is complete (if needed) 640 bigtime_t timeout = min_c(socket->send.timeout, TCP_CONNECTION_TIMEOUT); 641 if (timeout == 0) { 642 // we're a non-blocking socket 643 TRACE(" Connect() delayed, return EINPROGRESS"); 644 return EINPROGRESS; 645 } 646 647 bigtime_t absoluteTimeout = absolute_timeout(timeout); 648 gStackModule->store_syscall_restart_timeout(absoluteTimeout); 649 650 status = _WaitForEstablished(locker, absoluteTimeout); 651 TRACE(" Connect(): Connection complete: %s (timeout was %llu)", 652 strerror(status), timeout); 653 return posix_error(status); 654 } 655 656 657 status_t 658 TCPEndpoint::Accept(struct net_socket** _acceptedSocket) 659 { 660 TRACE("Accept()"); 661 662 MutexLocker locker(fLock); 663 664 status_t status; 665 bigtime_t timeout = absolute_timeout(socket->receive.timeout); 666 if (gStackModule->is_restarted_syscall()) 667 timeout = gStackModule->restore_syscall_restart_timeout(); 668 else 669 gStackModule->store_syscall_restart_timeout(timeout); 670 671 do { 672 locker.Unlock(); 673 674 status = acquire_sem_etc(fAcceptSemaphore, 1, B_ABSOLUTE_TIMEOUT 675 | B_CAN_INTERRUPT, timeout); 676 if (status != B_OK) 677 return status; 678 679 locker.Lock(); 680 status = gSocketModule->dequeue_connected(socket, _acceptedSocket); 681 #ifdef TRACE_TCP 682 if (status == B_OK) 683 TRACE(" Accept() returning %p", (*_acceptedSocket)->first_protocol); 684 #endif 685 } while (status != B_OK); 686 687 return status; 688 } 689 690 691 status_t 692 TCPEndpoint::Bind(const sockaddr *address) 693 { 694 if (address == NULL) 695 return B_BAD_VALUE; 696 697 MutexLocker lock(fLock); 698 699 TRACE("Bind() on address %s", PrintAddress(address)); 700 701 if (fState != CLOSED) 702 return EISCONN; 703 704 return fManager->Bind(this, address); 705 } 706 707 708 status_t 709 TCPEndpoint::Unbind(struct sockaddr *address) 710 { 711 TRACE("Unbind()"); 712 713 MutexLocker _(fLock); 714 return fManager->Unbind(this); 715 } 716 717 718 status_t 719 TCPEndpoint::Listen(int count) 720 { 721 TRACE("Listen()"); 722 723 MutexLocker _(fLock); 724 725 if (fState != CLOSED) 726 return B_BAD_VALUE; 727 728 fAcceptSemaphore = create_sem(0, "tcp accept"); 729 if (fAcceptSemaphore < B_OK) 730 return ENOBUFS; 731 732 status_t status = fManager->SetPassive(this); 733 if (status != B_OK) { 734 delete_sem(fAcceptSemaphore); 735 fAcceptSemaphore = -1; 736 return status; 737 } 738 739 gSocketModule->set_max_backlog(socket, count); 740 741 fState = LISTEN; 742 T(State(this)); 743 return B_OK; 744 } 745 746 747 status_t 748 TCPEndpoint::Shutdown(int direction) 749 { 750 TRACE("Shutdown(%i)", direction); 751 752 MutexLocker lock(fLock); 753 754 if (direction == SHUT_RD || direction == SHUT_RDWR) 755 fFlags |= FLAG_NO_RECEIVE; 756 757 if (direction == SHUT_WR || direction == SHUT_RDWR) { 758 // TODO: That's not correct. After read/write shutting down the socket 759 // one should still be able to read previously arrived data. 760 _Disconnect(false); 761 } 762 763 return B_OK; 764 } 765 766 767 /*! Puts data contained in \a buffer into send buffer */ 768 status_t 769 TCPEndpoint::SendData(net_buffer *buffer) 770 { 771 MutexLocker lock(fLock); 772 773 TRACE("SendData(buffer %p, size %lu, flags %lx) [total %lu bytes, has %lu]", 774 buffer, buffer->size, buffer->flags, fSendQueue.Size(), 775 fSendQueue.Free()); 776 777 if (fState == CLOSED) 778 return ENOTCONN; 779 if (fState == LISTEN) 780 return EDESTADDRREQ; 781 if (!is_writable(fState)) { 782 // we only send signals when called from userland 783 if (gStackModule->is_syscall()) 784 send_signal(find_thread(NULL), SIGPIPE); 785 return EPIPE; 786 } 787 788 uint32 flags = buffer->flags; 789 size_t left = buffer->size; 790 791 bigtime_t timeout = absolute_timeout(socket->send.timeout); 792 if (gStackModule->is_restarted_syscall()) 793 timeout = gStackModule->restore_syscall_restart_timeout(); 794 else 795 gStackModule->store_syscall_restart_timeout(timeout); 796 797 while (left > 0) { 798 while (fSendQueue.Free() < socket->send.low_water_mark) { 799 // wait until enough space is available 800 status_t status = fSendList.Wait(lock, timeout); 801 if (status < B_OK) { 802 TRACE(" SendData() returning %s (%d)", 803 strerror(posix_error(status)), (int)posix_error(status)); 804 return posix_error(status); 805 } 806 807 if (!is_writable(fState)) { 808 // we only send signals when called from userland 809 if (gStackModule->is_syscall()) 810 send_signal(find_thread(NULL), SIGPIPE); 811 return EPIPE; 812 } 813 } 814 815 size_t size = fSendQueue.Free(); 816 if (size < left) { 817 // we need to split the original buffer 818 net_buffer* clone = gBufferModule->clone(buffer, false); 819 // TODO: add offset/size parameter to net_buffer::clone() or 820 // even a move_data() function, as this is a bit inefficient 821 if (clone == NULL) 822 return ENOBUFS; 823 824 status_t status = gBufferModule->trim(clone, size); 825 if (status != B_OK) { 826 gBufferModule->free(clone); 827 return status; 828 } 829 830 gBufferModule->remove_header(buffer, size); 831 left -= size; 832 fSendQueue.Add(clone); 833 } else { 834 left -= buffer->size; 835 fSendQueue.Add(buffer); 836 } 837 } 838 839 TRACE(" SendData(): %lu bytes used.", fSendQueue.Used()); 840 841 bool force = false; 842 if ((flags & MSG_OOB) != 0) { 843 fSendUrgentOffset = fSendQueue.LastSequence(); 844 // RFC 961 specifies that the urgent offset points to the last 845 // byte of urgent data. However, this is commonly implemented as 846 // here, ie. it points to the first byte after the urgent data. 847 force = true; 848 } 849 if ((flags & MSG_EOF) != 0) 850 _Disconnect(false); 851 852 if (fState == ESTABLISHED || fState == FINISH_RECEIVED) 853 _SendQueued(force); 854 855 return B_OK; 856 } 857 858 859 ssize_t 860 TCPEndpoint::SendAvailable() 861 { 862 MutexLocker locker(fLock); 863 864 ssize_t available; 865 866 if (is_writable(fState)) 867 available = fSendQueue.Free(); 868 else 869 available = EPIPE; 870 871 TRACE("SendAvailable(): %li", available); 872 return available; 873 } 874 875 876 status_t 877 TCPEndpoint::FillStat(net_stat *stat) 878 { 879 MutexLocker _(fLock); 880 881 strlcpy(stat->state, name_for_state(fState), sizeof(stat->state)); 882 stat->receive_queue_size = fReceiveQueue.Available(); 883 stat->send_queue_size = fSendQueue.Used(); 884 885 return B_OK; 886 } 887 888 889 status_t 890 TCPEndpoint::ReadData(size_t numBytes, uint32 flags, net_buffer** _buffer) 891 { 892 TRACE("ReadData(%lu bytes, flags 0x%x)", numBytes, (unsigned int)flags); 893 894 MutexLocker locker(fLock); 895 896 *_buffer = NULL; 897 898 if (fState == CLOSED) 899 return ENOTCONN; 900 901 bigtime_t timeout = absolute_timeout(socket->receive.timeout); 902 if (gStackModule->is_restarted_syscall()) 903 timeout = gStackModule->restore_syscall_restart_timeout(); 904 else 905 gStackModule->store_syscall_restart_timeout(timeout); 906 907 if (fState == SYNCHRONIZE_SENT || fState == SYNCHRONIZE_RECEIVED) { 908 if (flags & MSG_DONTWAIT) 909 return B_WOULD_BLOCK; 910 911 status_t status = _WaitForEstablished(locker, timeout); 912 if (status < B_OK) 913 return posix_error(status); 914 } 915 916 size_t dataNeeded = socket->receive.low_water_mark; 917 918 // When MSG_WAITALL is set then the function should block 919 // until the full amount of data can be returned. 920 if (flags & MSG_WAITALL) 921 dataNeeded = numBytes; 922 923 // TODO: add support for urgent data (MSG_OOB) 924 925 while (true) { 926 if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE 927 || fState == TIME_WAIT) { 928 // ``Connection closing''. 929 return B_OK; 930 } 931 932 if (fReceiveQueue.Available() > 0) { 933 if (fReceiveQueue.Available() >= dataNeeded 934 || (fReceiveQueue.PushedData() > 0 935 && fReceiveQueue.PushedData() >= fReceiveQueue.Available())) 936 break; 937 } else if (fState == FINISH_RECEIVED) { 938 // ``If no text is awaiting delivery, the RECEIVE will 939 // get a Connection closing''. 940 return B_OK; 941 } 942 943 if ((flags & MSG_DONTWAIT) != 0 || socket->receive.timeout == 0) 944 return B_WOULD_BLOCK; 945 946 if ((fFlags & FLAG_NO_RECEIVE) != 0) 947 return B_OK; 948 949 status_t status = fReceiveList.Wait(locker, timeout); 950 if (status < B_OK) { 951 // The Open Group base specification mentions that EINTR should be 952 // returned if the recv() is interrupted before _any data_ is 953 // available. So we actually check if there is data, and if so, 954 // push it to the user. 955 if ((status == B_TIMED_OUT || status == B_INTERRUPTED) 956 && fReceiveQueue.Available() > 0) 957 break; 958 959 return posix_error(status); 960 } 961 } 962 963 TRACE(" ReadData(): %lu are available.", fReceiveQueue.Available()); 964 965 if (numBytes < fReceiveQueue.Available()) 966 fReceiveList.Signal(); 967 968 bool clone = (flags & MSG_PEEK) != 0; 969 970 ssize_t receivedBytes = fReceiveQueue.Get(numBytes, !clone, _buffer); 971 972 TRACE(" ReadData(): %lu bytes kept.", fReceiveQueue.Available()); 973 974 // if we are opening the window, check if we should send an ACK 975 if (!clone) 976 SendAcknowledge(false); 977 978 return receivedBytes; 979 } 980 981 982 ssize_t 983 TCPEndpoint::ReadAvailable() 984 { 985 MutexLocker locker(fLock); 986 987 TRACE("ReadAvailable(): %li", _AvailableData()); 988 989 return _AvailableData(); 990 } 991 992 993 status_t 994 TCPEndpoint::SetSendBufferSize(size_t length) 995 { 996 MutexLocker _(fLock); 997 fSendQueue.SetMaxBytes(length); 998 return B_OK; 999 } 1000 1001 1002 status_t 1003 TCPEndpoint::SetReceiveBufferSize(size_t length) 1004 { 1005 MutexLocker _(fLock); 1006 fReceiveQueue.SetMaxBytes(length); 1007 return B_OK; 1008 } 1009 1010 1011 status_t 1012 TCPEndpoint::GetOption(int option, void* _value, int* _length) 1013 { 1014 if (*_length != sizeof(int)) 1015 return B_BAD_VALUE; 1016 1017 int* value = (int*)_value; 1018 1019 switch (option) { 1020 case TCP_NODELAY: 1021 if ((fOptions & TCP_NODELAY) != 0) 1022 *value = 1; 1023 else 1024 *value = 0; 1025 return B_OK; 1026 1027 case TCP_MAXSEG: 1028 *value = fReceiveMaxSegmentSize; 1029 return B_OK; 1030 1031 default: 1032 return B_BAD_VALUE; 1033 } 1034 } 1035 1036 1037 status_t 1038 TCPEndpoint::SetOption(int option, const void* _value, int length) 1039 { 1040 if (option != TCP_NODELAY) 1041 return B_BAD_VALUE; 1042 1043 if (length != sizeof(int)) 1044 return B_BAD_VALUE; 1045 1046 const int* value = (const int*)_value; 1047 1048 MutexLocker _(fLock); 1049 if (*value) 1050 fOptions |= TCP_NODELAY; 1051 else 1052 fOptions &= ~TCP_NODELAY; 1053 1054 return B_OK; 1055 } 1056 1057 1058 // #pragma mark - misc 1059 1060 1061 bool 1062 TCPEndpoint::IsBound() const 1063 { 1064 return !LocalAddress().IsEmpty(true); 1065 } 1066 1067 1068 bool 1069 TCPEndpoint::IsLocal() const 1070 { 1071 return (fFlags & FLAG_LOCAL) != 0; 1072 } 1073 1074 1075 status_t 1076 TCPEndpoint::DelayedAcknowledge() 1077 { 1078 if (gStackModule->cancel_timer(&fDelayedAcknowledgeTimer)) { 1079 // timer was active, send an ACK now (with the exception above, 1080 // we send every other ACK) 1081 return SendAcknowledge(true); 1082 } 1083 1084 gStackModule->set_timer(&fDelayedAcknowledgeTimer, 1085 TCP_DELAYED_ACKNOWLEDGE_TIMEOUT); 1086 return B_OK; 1087 } 1088 1089 1090 status_t 1091 TCPEndpoint::SendAcknowledge(bool force) 1092 { 1093 return _SendQueued(force, 0); 1094 } 1095 1096 1097 void 1098 TCPEndpoint::_StartPersistTimer() 1099 { 1100 gStackModule->set_timer(&fPersistTimer, 1000000LL); 1101 } 1102 1103 1104 void 1105 TCPEndpoint::_EnterTimeWait() 1106 { 1107 TRACE("_EnterTimeWait()\n"); 1108 1109 _CancelConnectionTimers(); 1110 1111 if (fState == TIME_WAIT && IsLocal()) { 1112 // we do not use TIME_WAIT state for local connections 1113 fFlags |= FLAG_DELETE_ON_CLOSE; 1114 return; 1115 } 1116 1117 _UpdateTimeWait(); 1118 } 1119 1120 1121 void 1122 TCPEndpoint::_UpdateTimeWait() 1123 { 1124 gStackModule->set_timer(&fTimeWaitTimer, TCP_MAX_SEGMENT_LIFETIME << 1); 1125 } 1126 1127 1128 void 1129 TCPEndpoint::_CancelConnectionTimers() 1130 { 1131 gStackModule->cancel_timer(&fRetransmitTimer); 1132 gStackModule->cancel_timer(&fPersistTimer); 1133 gStackModule->cancel_timer(&fDelayedAcknowledgeTimer); 1134 } 1135 1136 1137 /*! Sends the FIN flag to the peer when the connection is still open. 1138 Moves the endpoint to the next state depending on where it was. 1139 */ 1140 status_t 1141 TCPEndpoint::_Disconnect(bool closing) 1142 { 1143 tcp_state previousState = fState; 1144 1145 if (fState == SYNCHRONIZE_RECEIVED || fState == ESTABLISHED) 1146 fState = FINISH_SENT; 1147 else if (fState == FINISH_RECEIVED) 1148 fState = WAIT_FOR_FINISH_ACKNOWLEDGE; 1149 else 1150 return B_OK; 1151 1152 T(State(this)); 1153 1154 status_t status = _SendQueued(); 1155 if (status != B_OK) { 1156 fState = previousState; 1157 T(State(this)); 1158 return status; 1159 } 1160 1161 return B_OK; 1162 } 1163 1164 1165 void 1166 TCPEndpoint::_MarkEstablished() 1167 { 1168 fState = ESTABLISHED; 1169 T(State(this)); 1170 1171 if (gSocketModule->has_parent(socket)) { 1172 gSocketModule->set_connected(socket); 1173 release_sem_etc(fAcceptSemaphore, 1, B_DO_NOT_RESCHEDULE); 1174 } 1175 1176 fSendList.Signal(); 1177 } 1178 1179 1180 status_t 1181 TCPEndpoint::_WaitForEstablished(MutexLocker &locker, bigtime_t timeout) 1182 { 1183 // TODO: Checking for CLOSED seems correct, but breaks several neon tests. 1184 // When investigating this, also have a look at _Close() and _HandleReset(). 1185 while (fState < ESTABLISHED/* && fState != CLOSED*/) { 1186 if (socket->error != B_OK) 1187 return socket->error; 1188 1189 status_t status = fSendList.Wait(locker, timeout); 1190 if (status < B_OK) 1191 return status; 1192 } 1193 1194 return B_OK; 1195 } 1196 1197 1198 // #pragma mark - receive 1199 1200 1201 void 1202 TCPEndpoint::_Close() 1203 { 1204 _CancelConnectionTimers(); 1205 fState = CLOSED; 1206 T(State(this)); 1207 1208 fFlags |= FLAG_DELETE_ON_CLOSE; 1209 1210 fSendList.Signal(); 1211 _NotifyReader(); 1212 1213 if (gSocketModule->has_parent(socket)) { 1214 // We still have a parent - obviously, we haven't been accepted yet, 1215 // so no one could ever close us. 1216 _CancelConnectionTimers(); 1217 gSocketModule->set_aborted(socket); 1218 } 1219 } 1220 1221 1222 void 1223 TCPEndpoint::_HandleReset(status_t error) 1224 { 1225 socket->error = error; 1226 _Close(); 1227 1228 gSocketModule->notify(socket, B_SELECT_WRITE, error); 1229 gSocketModule->notify(socket, B_SELECT_ERROR, error); 1230 } 1231 1232 1233 void 1234 TCPEndpoint::_DuplicateAcknowledge(tcp_segment_header &segment) 1235 { 1236 if (++fDuplicateAcknowledgeCount < 3) 1237 return; 1238 1239 if (fDuplicateAcknowledgeCount == 3) { 1240 _ResetSlowStart(); 1241 fCongestionWindow = fSlowStartThreshold + 3 1242 * fSendMaxSegmentSize; 1243 fSendNext = segment.acknowledge; 1244 } else if (fDuplicateAcknowledgeCount > 3) 1245 fCongestionWindow += fSendMaxSegmentSize; 1246 1247 _SendQueued(); 1248 } 1249 1250 1251 void 1252 TCPEndpoint::_UpdateTimestamps(tcp_segment_header& segment, 1253 size_t segmentLength) 1254 { 1255 if (fFlags & FLAG_OPTION_TIMESTAMP) { 1256 tcp_sequence sequence(segment.sequence); 1257 1258 if (fLastAcknowledgeSent >= sequence 1259 && fLastAcknowledgeSent < (sequence + segmentLength)) 1260 fReceivedTimestamp = segment.timestamp_value; 1261 } 1262 } 1263 1264 1265 ssize_t 1266 TCPEndpoint::_AvailableData() const 1267 { 1268 // TODO: Refer to the FLAG_NO_RECEIVE comment above regarding 1269 // the application of FLAG_NO_RECEIVE in listen()ing 1270 // sockets. 1271 if (fState == LISTEN) 1272 return gSocketModule->count_connected(socket); 1273 if (fState == SYNCHRONIZE_SENT) 1274 return 0; 1275 1276 ssize_t availableData = fReceiveQueue.Available(); 1277 1278 if (availableData == 0 && !_ShouldReceive()) 1279 return ENOTCONN; 1280 1281 return availableData; 1282 } 1283 1284 1285 void 1286 TCPEndpoint::_NotifyReader() 1287 { 1288 fReceiveList.Signal(); 1289 gSocketModule->notify(socket, B_SELECT_READ, _AvailableData()); 1290 } 1291 1292 1293 bool 1294 TCPEndpoint::_AddData(tcp_segment_header& segment, net_buffer* buffer) 1295 { 1296 fReceiveQueue.Add(buffer, segment.sequence); 1297 fReceiveNext = fReceiveQueue.NextSequence(); 1298 1299 TRACE(" _AddData(): adding data, receive next = %lu. Now have %lu bytes.", 1300 (uint32)fReceiveNext, fReceiveQueue.Available()); 1301 1302 if (segment.flags & TCP_FLAG_PUSH) 1303 fReceiveQueue.SetPushPointer(); 1304 1305 return fReceiveQueue.Available() > 0; 1306 } 1307 1308 1309 void 1310 TCPEndpoint::_PrepareReceivePath(tcp_segment_header& segment) 1311 { 1312 fInitialReceiveSequence = segment.sequence; 1313 1314 // count the received SYN 1315 segment.sequence++; 1316 1317 fReceiveNext = segment.sequence; 1318 fReceiveQueue.SetInitialSequence(segment.sequence); 1319 1320 if ((fOptions & TCP_NOOPT) == 0) { 1321 if (segment.max_segment_size > 0) 1322 fSendMaxSegmentSize = segment.max_segment_size; 1323 1324 if (segment.options & TCP_HAS_WINDOW_SCALE) { 1325 fFlags |= FLAG_OPTION_WINDOW_SCALE; 1326 fSendWindowShift = segment.window_shift; 1327 } else { 1328 fFlags &= ~FLAG_OPTION_WINDOW_SCALE; 1329 fReceiveWindowShift = 0; 1330 } 1331 1332 if (segment.options & TCP_HAS_TIMESTAMPS) { 1333 fFlags |= FLAG_OPTION_TIMESTAMP; 1334 fReceivedTimestamp = segment.timestamp_value; 1335 } else 1336 fFlags &= ~FLAG_OPTION_TIMESTAMP; 1337 } 1338 1339 fCongestionWindow = 2 * fSendMaxSegmentSize; 1340 fSlowStartThreshold = (uint32)segment.advertised_window << fSendWindowShift; 1341 } 1342 1343 1344 bool 1345 TCPEndpoint::_ShouldReceive() const 1346 { 1347 if ((fFlags & FLAG_NO_RECEIVE) != 0) 1348 return false; 1349 1350 return fState == ESTABLISHED || fState == FINISH_SENT 1351 || fState == FINISH_ACKNOWLEDGED; 1352 } 1353 1354 1355 int32 1356 TCPEndpoint::_Spawn(TCPEndpoint* parent, tcp_segment_header& segment, 1357 net_buffer* buffer) 1358 { 1359 MutexLocker _(fLock); 1360 1361 // TODO error checking 1362 ProtocolSocket::Open(); 1363 1364 fState = SYNCHRONIZE_RECEIVED; 1365 T(Spawn(parent, this)); 1366 1367 fManager = parent->fManager; 1368 1369 LocalAddress().SetTo(buffer->destination); 1370 PeerAddress().SetTo(buffer->source); 1371 1372 TRACE("Spawn()"); 1373 1374 // TODO: proper error handling! 1375 if (fManager->BindChild(this) != B_OK) { 1376 T(Error(this, "binding failed", __LINE__)); 1377 return DROP; 1378 } 1379 if (_PrepareSendPath(*PeerAddress()) != B_OK) { 1380 T(Error(this, "prepare send faild", __LINE__)); 1381 return DROP; 1382 } 1383 1384 fOptions = parent->fOptions; 1385 fAcceptSemaphore = parent->fAcceptSemaphore; 1386 1387 _PrepareReceivePath(segment); 1388 1389 // send SYN+ACK 1390 if (_SendQueued() != B_OK) { 1391 T(Error(this, "sending failed", __LINE__)); 1392 return DROP; 1393 } 1394 1395 segment.flags &= ~TCP_FLAG_SYNCHRONIZE; 1396 // we handled this flag now, it must not be set for further processing 1397 1398 return _Receive(segment, buffer); 1399 } 1400 1401 1402 int32 1403 TCPEndpoint::_ListenReceive(tcp_segment_header& segment, net_buffer* buffer) 1404 { 1405 TRACE("ListenReceive()"); 1406 1407 // Essentially, we accept only TCP_FLAG_SYNCHRONIZE in this state, 1408 // but the error behaviour differs 1409 if (segment.flags & TCP_FLAG_RESET) 1410 return DROP; 1411 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) 1412 return DROP | RESET; 1413 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0) 1414 return DROP; 1415 1416 // TODO: drop broadcast/multicast 1417 1418 // spawn new endpoint for accept() 1419 net_socket* newSocket; 1420 if (gSocketModule->spawn_pending_socket(socket, &newSocket) < B_OK) { 1421 T(Error(this, "spawning failed", __LINE__)); 1422 return DROP; 1423 } 1424 1425 return ((TCPEndpoint *)newSocket->first_protocol)->_Spawn(this, 1426 segment, buffer); 1427 } 1428 1429 1430 int32 1431 TCPEndpoint::_SynchronizeSentReceive(tcp_segment_header &segment, 1432 net_buffer *buffer) 1433 { 1434 TRACE("_SynchronizeSentReceive()"); 1435 1436 if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0 1437 && (fInitialSendSequence >= segment.acknowledge 1438 || fSendMax < segment.acknowledge)) 1439 return DROP | RESET; 1440 1441 if (segment.flags & TCP_FLAG_RESET) { 1442 _HandleReset(ECONNREFUSED); 1443 return DROP; 1444 } 1445 1446 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) == 0) 1447 return DROP; 1448 1449 fSendUnacknowledged = segment.acknowledge; 1450 _PrepareReceivePath(segment); 1451 1452 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) { 1453 _MarkEstablished(); 1454 } else { 1455 // simultaneous open 1456 fState = SYNCHRONIZE_RECEIVED; 1457 T(State(this)); 1458 } 1459 1460 segment.flags &= ~TCP_FLAG_SYNCHRONIZE; 1461 // we handled this flag now, it must not be set for further processing 1462 1463 return _Receive(segment, buffer) | IMMEDIATE_ACKNOWLEDGE; 1464 } 1465 1466 1467 int32 1468 TCPEndpoint::_Receive(tcp_segment_header& segment, net_buffer* buffer) 1469 { 1470 uint32 advertisedWindow = (uint32)segment.advertised_window 1471 << fSendWindowShift; 1472 size_t segmentLength = buffer->size; 1473 1474 // First, handle the most common case for uni-directional data transfer 1475 // (known as header prediction - the segment must not change the window, 1476 // and must be the expected sequence, and contain no control flags) 1477 1478 if (fState == ESTABLISHED 1479 && segment.AcknowledgeOnly() 1480 && fReceiveNext == segment.sequence 1481 && advertisedWindow > 0 && advertisedWindow == fSendWindow 1482 && fSendNext == fSendMax) { 1483 _UpdateTimestamps(segment, segmentLength); 1484 1485 if (segmentLength == 0) { 1486 // this is a pure acknowledge segment - we're on the sending end 1487 if (fSendUnacknowledged < segment.acknowledge 1488 && fSendMax >= segment.acknowledge) { 1489 _Acknowledged(segment); 1490 return DROP; 1491 } 1492 } else if (segment.acknowledge == fSendUnacknowledged 1493 && fReceiveQueue.IsContiguous() 1494 && fReceiveQueue.Free() >= segmentLength 1495 && (fFlags & FLAG_NO_RECEIVE) == 0) { 1496 if (_AddData(segment, buffer)) 1497 _NotifyReader(); 1498 1499 return KEEP | ((segment.flags & TCP_FLAG_PUSH) != 0 1500 ? IMMEDIATE_ACKNOWLEDGE : ACKNOWLEDGE); 1501 } 1502 } 1503 1504 // The fast path was not applicable, so we continue with the standard 1505 // processing of the incoming segment 1506 1507 ASSERT(fState != SYNCHRONIZE_SENT && fState != LISTEN); 1508 1509 if (fState != CLOSED && fState != TIME_WAIT) { 1510 // Check sequence number 1511 if (!segment_in_sequence(segment, segmentLength, fReceiveNext, 1512 fReceiveWindow)) { 1513 TRACE(" Receive(): segment out of window, next: %lu wnd: %lu", 1514 (uint32)fReceiveNext, fReceiveWindow); 1515 if (segment.flags & TCP_FLAG_RESET) { 1516 // TODO: this doesn't look right - review! 1517 return DROP; 1518 } 1519 return DROP | IMMEDIATE_ACKNOWLEDGE; 1520 } 1521 } 1522 1523 if (segment.flags & TCP_FLAG_RESET) { 1524 // Is this a valid reset? 1525 // We generally ignore resets in time wait state (see RFC 1337) 1526 if (fLastAcknowledgeSent <= segment.sequence 1527 && tcp_sequence(segment.sequence) < (fLastAcknowledgeSent 1528 + fReceiveWindow) 1529 && fState != TIME_WAIT) { 1530 status_t error; 1531 if (fState == SYNCHRONIZE_RECEIVED) 1532 error = ECONNREFUSED; 1533 else if (fState == CLOSING || fState == WAIT_FOR_FINISH_ACKNOWLEDGE) 1534 error = ENOTCONN; 1535 else 1536 error = ECONNRESET; 1537 1538 _HandleReset(error); 1539 } 1540 1541 return DROP; 1542 } 1543 1544 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0 1545 || (fState == SYNCHRONIZE_RECEIVED 1546 && (fInitialReceiveSequence > segment.sequence 1547 || (segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0 1548 && (fSendUnacknowledged > segment.acknowledge 1549 || fSendMax < segment.acknowledge)))) { 1550 // reset the connection - either the initial SYN was faulty, or we 1551 // received a SYN within the data stream 1552 return DROP | RESET; 1553 } 1554 1555 fReceiveWindow = max_c(fReceiveQueue.Free(), fReceiveWindow); 1556 // the window must not shrink 1557 1558 // trim buffer to be within the receive window 1559 int32 drop = (int32)(fReceiveNext - segment.sequence).Number(); 1560 if (drop > 0) { 1561 if ((uint32)drop > buffer->size 1562 || ((uint32)drop == buffer->size 1563 && (segment.flags & TCP_FLAG_FINISH) == 0)) { 1564 // don't accidently remove a FIN we shouldn't remove 1565 segment.flags &= ~TCP_FLAG_FINISH; 1566 drop = buffer->size; 1567 } 1568 1569 // remove duplicate data at the start 1570 TRACE("* remove %ld bytes from the start", drop); 1571 gBufferModule->remove_header(buffer, drop); 1572 segment.sequence += drop; 1573 } 1574 1575 int32 action = KEEP; 1576 1577 drop = (int32)(segment.sequence + buffer->size 1578 - (fReceiveNext + fReceiveWindow)).Number(); 1579 if (drop > 0) { 1580 // remove data exceeding our window 1581 if ((uint32)drop >= buffer->size) { 1582 // if we can accept data, or the segment is not what we'd expect, 1583 // drop the segment (an immediate acknowledge is always triggered) 1584 if (fReceiveWindow != 0 || segment.sequence != fReceiveNext) 1585 return DROP | IMMEDIATE_ACKNOWLEDGE; 1586 1587 action |= IMMEDIATE_ACKNOWLEDGE; 1588 } 1589 1590 if ((segment.flags & TCP_FLAG_FINISH) != 0) { 1591 // we need to remove the finish, too, as part of the data 1592 drop--; 1593 } 1594 1595 segment.flags &= ~(TCP_FLAG_FINISH | TCP_FLAG_PUSH); 1596 TRACE("* remove %ld bytes from the end", drop); 1597 gBufferModule->remove_trailer(buffer, drop); 1598 } 1599 1600 #ifdef TRACE_TCP 1601 if (advertisedWindow > fSendWindow) { 1602 TRACE(" Receive(): Window update %lu -> %lu", fSendWindow, 1603 advertisedWindow); 1604 } 1605 #endif 1606 1607 fSendWindow = advertisedWindow; 1608 if (advertisedWindow > fSendMaxWindow) 1609 fSendMaxWindow = advertisedWindow; 1610 1611 // Then look at the acknowledgement for any updates 1612 1613 if ((segment.flags & TCP_FLAG_ACKNOWLEDGE) != 0) { 1614 // process acknowledged data 1615 if (fState == SYNCHRONIZE_RECEIVED) 1616 _MarkEstablished(); 1617 1618 if (fSendMax < segment.acknowledge) 1619 return DROP | IMMEDIATE_ACKNOWLEDGE; 1620 1621 if (segment.acknowledge < fSendUnacknowledged) { 1622 if (buffer->size == 0 && advertisedWindow == fSendWindow 1623 && (segment.flags & TCP_FLAG_FINISH) == 0) { 1624 TRACE("Receive(): duplicate ack!"); 1625 1626 _DuplicateAcknowledge(segment); 1627 } 1628 1629 return DROP; 1630 } else { 1631 // this segment acknowledges in flight data 1632 1633 if (fDuplicateAcknowledgeCount >= 3) { 1634 // deflate the window. 1635 fCongestionWindow = fSlowStartThreshold; 1636 } 1637 1638 fDuplicateAcknowledgeCount = 0; 1639 1640 if (fSendMax == segment.acknowledge) 1641 TRACE("Receive(): all inflight data ack'd!"); 1642 1643 if (segment.acknowledge > fSendQueue.LastSequence() 1644 && fState > ESTABLISHED) { 1645 TRACE("Receive(): FIN has been acknowledged!"); 1646 1647 switch (fState) { 1648 case FINISH_SENT: 1649 fState = FINISH_ACKNOWLEDGED; 1650 T(State(this)); 1651 break; 1652 case CLOSING: 1653 fState = TIME_WAIT; 1654 T(State(this)); 1655 _EnterTimeWait(); 1656 return DROP; 1657 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1658 _Close(); 1659 break; 1660 1661 default: 1662 break; 1663 } 1664 } 1665 1666 if (fState != CLOSED) 1667 _Acknowledged(segment); 1668 } 1669 } 1670 1671 if (segment.flags & TCP_FLAG_URGENT) { 1672 if (fState == ESTABLISHED || fState == FINISH_SENT 1673 || fState == FINISH_ACKNOWLEDGED) { 1674 // TODO: Handle urgent data: 1675 // - RCV.UP <- max(RCV.UP, SEG.UP) 1676 // - signal the user that urgent data is available (SIGURG) 1677 } 1678 } 1679 1680 bool notify = false; 1681 1682 if (buffer->size > 0 && _ShouldReceive()) 1683 notify = _AddData(segment, buffer); 1684 else { 1685 if ((fFlags & FLAG_NO_RECEIVE) != 0) 1686 fReceiveNext += buffer->size; 1687 1688 action = (action & ~KEEP) | DROP; 1689 } 1690 1691 if (segment.flags & TCP_FLAG_FINISH) { 1692 segmentLength++; 1693 if (fState != CLOSED && fState != LISTEN && fState != SYNCHRONIZE_SENT) { 1694 TRACE("Receive(): peer is finishing connection!"); 1695 fReceiveNext++; 1696 notify = true; 1697 1698 // FIN implies PUSH 1699 fReceiveQueue.SetPushPointer(); 1700 1701 // we'll reply immediately to the FIN if we are not 1702 // transitioning to TIME WAIT so we immediatly ACK it. 1703 action |= IMMEDIATE_ACKNOWLEDGE; 1704 1705 // other side is closing connection; change states 1706 switch (fState) { 1707 case ESTABLISHED: 1708 case SYNCHRONIZE_RECEIVED: 1709 fState = FINISH_RECEIVED; 1710 T(State(this)); 1711 break; 1712 case FINISH_SENT: 1713 // simultaneous close 1714 fState = CLOSING; 1715 T(State(this)); 1716 break; 1717 case FINISH_ACKNOWLEDGED: 1718 fState = TIME_WAIT; 1719 T(State(this)); 1720 _EnterTimeWait(); 1721 break; 1722 case TIME_WAIT: 1723 _UpdateTimeWait(); 1724 break; 1725 1726 default: 1727 break; 1728 } 1729 } 1730 } 1731 1732 if (notify) 1733 _NotifyReader(); 1734 1735 if (buffer->size > 0 || (segment.flags & TCP_FLAG_SYNCHRONIZE) != 0) 1736 action |= ACKNOWLEDGE; 1737 1738 _UpdateTimestamps(segment, segmentLength); 1739 1740 TRACE("Receive() Action %ld", action); 1741 1742 return action; 1743 } 1744 1745 1746 int32 1747 TCPEndpoint::SegmentReceived(tcp_segment_header& segment, net_buffer* buffer) 1748 { 1749 MutexLocker locker(fLock); 1750 1751 TRACE("SegmentReceived(): buffer %p (%lu bytes) address %s to %s\n" 1752 "\tflags 0x%x, seq %lu, ack %lu, wnd %lu", 1753 buffer, buffer->size, PrintAddress(buffer->source), 1754 PrintAddress(buffer->destination), segment.flags, segment.sequence, 1755 segment.acknowledge, 1756 (uint32)segment.advertised_window << fSendWindowShift); 1757 T(Receive(this, segment, 1758 (uint32)segment.advertised_window << fSendWindowShift, buffer)); 1759 int32 segmentAction = DROP; 1760 1761 switch (fState) { 1762 case LISTEN: 1763 segmentAction = _ListenReceive(segment, buffer); 1764 break; 1765 1766 case SYNCHRONIZE_SENT: 1767 segmentAction = _SynchronizeSentReceive(segment, buffer); 1768 break; 1769 1770 case SYNCHRONIZE_RECEIVED: 1771 case ESTABLISHED: 1772 case FINISH_RECEIVED: 1773 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1774 case FINISH_SENT: 1775 case FINISH_ACKNOWLEDGED: 1776 case CLOSING: 1777 case TIME_WAIT: 1778 case CLOSED: 1779 segmentAction = _Receive(segment, buffer); 1780 break; 1781 } 1782 1783 // process acknowledge action as asked for by the *Receive() method 1784 if (segmentAction & IMMEDIATE_ACKNOWLEDGE) 1785 SendAcknowledge(true); 1786 else if (segmentAction & ACKNOWLEDGE) 1787 DelayedAcknowledge(); 1788 1789 if ((fFlags & (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) 1790 == (FLAG_CLOSED | FLAG_DELETE_ON_CLOSE)) { 1791 locker.Unlock(); 1792 gSocketModule->release_socket(socket); 1793 } 1794 1795 return segmentAction; 1796 } 1797 1798 1799 // #pragma mark - send 1800 1801 1802 inline uint8 1803 TCPEndpoint::_CurrentFlags() 1804 { 1805 // we don't set FLAG_FINISH here, instead we do it 1806 // conditionally below depending if we are sending 1807 // the last bytes of the send queue. 1808 1809 switch (fState) { 1810 case CLOSED: 1811 return TCP_FLAG_RESET | TCP_FLAG_ACKNOWLEDGE; 1812 1813 case SYNCHRONIZE_SENT: 1814 return TCP_FLAG_SYNCHRONIZE; 1815 case SYNCHRONIZE_RECEIVED: 1816 return TCP_FLAG_SYNCHRONIZE | TCP_FLAG_ACKNOWLEDGE; 1817 1818 case ESTABLISHED: 1819 case FINISH_RECEIVED: 1820 case FINISH_ACKNOWLEDGED: 1821 case TIME_WAIT: 1822 case WAIT_FOR_FINISH_ACKNOWLEDGE: 1823 case FINISH_SENT: 1824 case CLOSING: 1825 return TCP_FLAG_ACKNOWLEDGE; 1826 1827 default: 1828 return 0; 1829 } 1830 } 1831 1832 1833 inline bool 1834 TCPEndpoint::_ShouldSendSegment(tcp_segment_header& segment, uint32 length, 1835 uint32 segmentMaxSize, uint32 flightSize) 1836 { 1837 if (length > 0) { 1838 // Avoid the silly window syndrome - we only send a segment in case: 1839 // - we have a full segment to send, or 1840 // - we're at the end of our buffer queue, or 1841 // - the buffer is at least larger than half of the maximum send window, 1842 // or 1843 // - we're retransmitting data 1844 if (length == segmentMaxSize 1845 || (fOptions & TCP_NODELAY) != 0 1846 || tcp_sequence(fSendNext + length) == fSendQueue.LastSequence() 1847 || (fSendMaxWindow > 0 && length >= fSendMaxWindow / 2)) 1848 return true; 1849 } 1850 1851 // check if we need to send a window update to the peer 1852 if (segment.advertised_window > 0) { 1853 // correct the window to take into account what already has been advertised 1854 uint32 window = (segment.advertised_window << fReceiveWindowShift) 1855 - (fReceiveMaxAdvertised - fReceiveNext).Number(); 1856 1857 // if we can advertise a window larger than twice the maximum segment 1858 // size, or half the maximum buffer size we send a window update 1859 if (window >= (fReceiveMaxSegmentSize << 1) 1860 || window >= (socket->receive.buffer_size >> 1)) 1861 return true; 1862 } 1863 1864 if ((segment.flags & (TCP_FLAG_SYNCHRONIZE | TCP_FLAG_FINISH 1865 | TCP_FLAG_RESET)) != 0) 1866 return true; 1867 1868 // We do have urgent data pending 1869 if (fSendUrgentOffset > fSendNext) 1870 return true; 1871 1872 // there is no reason to send a segment just now 1873 return false; 1874 } 1875 1876 1877 status_t 1878 TCPEndpoint::_SendQueued(bool force) 1879 { 1880 return _SendQueued(force, fSendWindow); 1881 } 1882 1883 1884 /*! Sends one or more TCP segments with the data waiting in the queue, or some 1885 specific flags that need to be sent. 1886 */ 1887 status_t 1888 TCPEndpoint::_SendQueued(bool force, uint32 sendWindow) 1889 { 1890 if (fRoute == NULL) 1891 return B_ERROR; 1892 1893 // in passive state? 1894 if (fState == LISTEN) 1895 return B_ERROR; 1896 1897 tcp_segment_header segment(_CurrentFlags()); 1898 1899 if ((fOptions & TCP_NOOPT) == 0) { 1900 if ((fFlags & FLAG_OPTION_TIMESTAMP) != 0) { 1901 segment.options |= TCP_HAS_TIMESTAMPS; 1902 segment.timestamp_reply = fReceivedTimestamp; 1903 segment.timestamp_value = tcp_now(); 1904 } 1905 1906 if ((segment.flags & TCP_FLAG_SYNCHRONIZE) != 0 1907 && fSendNext == fInitialSendSequence) { 1908 // add connection establishment options 1909 segment.max_segment_size = fReceiveMaxSegmentSize; 1910 if (fFlags & FLAG_OPTION_WINDOW_SCALE) { 1911 segment.options |= TCP_HAS_WINDOW_SCALE; 1912 segment.window_shift = fReceiveWindowShift; 1913 } 1914 } 1915 } 1916 1917 size_t availableBytes = fReceiveQueue.Free(); 1918 if (fFlags & FLAG_OPTION_WINDOW_SCALE) 1919 segment.advertised_window = availableBytes >> fReceiveWindowShift; 1920 else 1921 segment.advertised_window = min_c(TCP_MAX_WINDOW, availableBytes); 1922 1923 segment.acknowledge = fReceiveNext.Number(); 1924 1925 // Process urgent data 1926 if (fSendUrgentOffset > fSendNext) { 1927 segment.flags |= TCP_FLAG_URGENT; 1928 segment.urgent_offset = (fSendUrgentOffset - fSendNext).Number(); 1929 } else { 1930 fSendUrgentOffset = fSendUnacknowledged.Number(); 1931 // Keep urgent offset updated, so that it doesn't reach into our 1932 // send window on overlap 1933 segment.urgent_offset = 0; 1934 } 1935 1936 if (fCongestionWindow > 0 && fCongestionWindow < sendWindow) 1937 sendWindow = fCongestionWindow; 1938 1939 // fSendUnacknowledged 1940 // | fSendNext fSendMax 1941 // | | | 1942 // v v v 1943 // ----------------------------------- 1944 // | effective window | 1945 // ----------------------------------- 1946 1947 // Flight size represents the window of data which is currently in the 1948 // ether. We should never send data such as the flight size becomes larger 1949 // than the effective window. Note however that the effective window may be 1950 // reduced (by congestion for instance), so at some point in time flight 1951 // size may be larger than the currently calculated window. 1952 1953 uint32 flightSize = (fSendMax - fSendUnacknowledged).Number(); 1954 uint32 consumedWindow = (fSendNext - fSendUnacknowledged).Number(); 1955 1956 if (consumedWindow > sendWindow) { 1957 sendWindow = 0; 1958 // TODO enter persist state? try to get a window update. 1959 } else 1960 sendWindow -= consumedWindow; 1961 1962 if (force && sendWindow == 0 && fSendNext <= fSendQueue.LastSequence()) { 1963 // send one byte of data to ask for a window update 1964 // (triggered by the persist timer) 1965 sendWindow = 1; 1966 } 1967 1968 uint32 length = min_c(fSendQueue.Available(fSendNext), sendWindow); 1969 tcp_sequence previousSendNext = fSendNext; 1970 1971 do { 1972 uint32 segmentMaxSize = fSendMaxSegmentSize 1973 - tcp_options_length(segment); 1974 uint32 segmentLength = min_c(length, segmentMaxSize); 1975 1976 if (fSendNext + segmentLength == fSendQueue.LastSequence()) { 1977 if (state_needs_finish(fState)) 1978 segment.flags |= TCP_FLAG_FINISH; 1979 if (length > 0) 1980 segment.flags |= TCP_FLAG_PUSH; 1981 } 1982 1983 // Determine if we should really send this segment 1984 if (!force && !_ShouldSendSegment(segment, segmentLength, 1985 segmentMaxSize, flightSize)) { 1986 if (fSendQueue.Available() 1987 && !gStackModule->is_timer_active(&fPersistTimer) 1988 && !gStackModule->is_timer_active(&fRetransmitTimer)) 1989 _StartPersistTimer(); 1990 break; 1991 } 1992 1993 net_buffer *buffer = gBufferModule->create(256); 1994 if (buffer == NULL) 1995 return B_NO_MEMORY; 1996 1997 status_t status = B_OK; 1998 if (segmentLength > 0) 1999 status = fSendQueue.Get(buffer, fSendNext, segmentLength); 2000 if (status < B_OK) { 2001 gBufferModule->free(buffer); 2002 return status; 2003 } 2004 2005 LocalAddress().CopyTo(buffer->source); 2006 PeerAddress().CopyTo(buffer->destination); 2007 2008 uint32 size = buffer->size; 2009 segment.sequence = fSendNext.Number(); 2010 2011 TRACE("SendQueued(): buffer %p (%lu bytes) address %s to %s\n" 2012 "\tflags 0x%x, seq %lu, ack %lu, rwnd %hu, cwnd %lu, ssthresh %lu\n" 2013 "\tlen %lu first %lu last %lu", 2014 buffer, buffer->size, PrintAddress(buffer->source), 2015 PrintAddress(buffer->destination), segment.flags, segment.sequence, 2016 segment.acknowledge, segment.advertised_window, 2017 fCongestionWindow, fSlowStartThreshold, segmentLength, 2018 (uint32)fSendQueue.FirstSequence(), 2019 (uint32)fSendQueue.LastSequence()); 2020 T(Send(this, segment, buffer, fSendQueue.FirstSequence(), 2021 fSendQueue.LastSequence())); 2022 2023 PROBE(buffer, sendWindow); 2024 sendWindow -= buffer->size; 2025 2026 status = add_tcp_header(AddressModule(), segment, buffer); 2027 if (status != B_OK) { 2028 gBufferModule->free(buffer); 2029 return status; 2030 } 2031 2032 // Update send status - we need to do this before we send the data 2033 // for local connections as the answer is directly handled 2034 2035 if (segment.flags & TCP_FLAG_SYNCHRONIZE) { 2036 segment.options &= ~TCP_HAS_WINDOW_SCALE; 2037 segment.max_segment_size = 0; 2038 size++; 2039 } 2040 2041 if (segment.flags & TCP_FLAG_FINISH) 2042 size++; 2043 2044 uint32 sendMax = fSendMax.Number(); 2045 fSendNext += size; 2046 if (fSendMax < fSendNext) 2047 fSendMax = fSendNext; 2048 2049 fReceiveMaxAdvertised = fReceiveNext 2050 + ((uint32)segment.advertised_window << fReceiveWindowShift); 2051 2052 status = next->module->send_routed_data(next, fRoute, buffer); 2053 if (status < B_OK) { 2054 gBufferModule->free(buffer); 2055 2056 fSendNext = segment.sequence; 2057 fSendMax = sendMax; 2058 // restore send status 2059 return status; 2060 } 2061 2062 if (segment.flags & TCP_FLAG_ACKNOWLEDGE) 2063 fLastAcknowledgeSent = segment.acknowledge; 2064 2065 length -= segmentLength; 2066 segment.flags &= ~(TCP_FLAG_SYNCHRONIZE | TCP_FLAG_RESET 2067 | TCP_FLAG_FINISH); 2068 } while (length > 0); 2069 2070 // if we sent data from the beggining of the send queue, 2071 // start the retransmition timer 2072 if (previousSendNext == fSendUnacknowledged 2073 && fSendNext > previousSendNext) { 2074 TRACE(" SendQueue(): set retransmit timer with rto %llu", 2075 fRetransmitTimeout); 2076 2077 gStackModule->set_timer(&fRetransmitTimer, fRetransmitTimeout); 2078 } 2079 2080 return B_OK; 2081 } 2082 2083 2084 int 2085 TCPEndpoint::_MaxSegmentSize(const sockaddr* address) const 2086 { 2087 return next->module->get_mtu(next, address) - sizeof(tcp_header); 2088 } 2089 2090 2091 status_t 2092 TCPEndpoint::_PrepareSendPath(const sockaddr* peer) 2093 { 2094 if (fRoute == NULL) { 2095 fRoute = gDatalinkModule->get_route(Domain(), peer); 2096 if (fRoute == NULL) 2097 return ENETUNREACH; 2098 2099 if ((fRoute->flags & RTF_LOCAL) != 0) 2100 fFlags |= FLAG_LOCAL; 2101 } 2102 2103 // make sure connection does not already exist 2104 status_t status = fManager->SetConnection(this, *LocalAddress(), peer, 2105 fRoute->interface->address); 2106 if (status < B_OK) 2107 return status; 2108 2109 fInitialSendSequence = system_time() >> 4; 2110 fSendNext = fInitialSendSequence; 2111 fSendUnacknowledged = fInitialSendSequence; 2112 fSendMax = fInitialSendSequence; 2113 fSendUrgentOffset = fInitialSendSequence; 2114 2115 // we are counting the SYN here 2116 fSendQueue.SetInitialSequence(fSendNext + 1); 2117 2118 fReceiveMaxSegmentSize = _MaxSegmentSize(peer); 2119 2120 // Compute the window shift we advertise to our peer - if it doesn't support 2121 // this option, this will be reset to 0 (when its SYN is received) 2122 fReceiveWindowShift = 0; 2123 while (fReceiveWindowShift < TCP_MAX_WINDOW_SHIFT 2124 && (0xffffUL << fReceiveWindowShift) < socket->receive.buffer_size) { 2125 fReceiveWindowShift++; 2126 } 2127 2128 return B_OK; 2129 } 2130 2131 2132 void 2133 TCPEndpoint::_Acknowledged(tcp_segment_header& segment) 2134 { 2135 size_t previouslyUsed = fSendQueue.Used(); 2136 2137 fSendQueue.RemoveUntil(segment.acknowledge); 2138 fSendUnacknowledged = segment.acknowledge; 2139 2140 if (fSendNext < fSendUnacknowledged) 2141 fSendNext = fSendUnacknowledged; 2142 2143 if (fSendUnacknowledged == fSendMax) 2144 gStackModule->cancel_timer(&fRetransmitTimer); 2145 2146 if (fSendQueue.Used() < previouslyUsed) { 2147 // this ACK acknowledged data 2148 2149 if (segment.options & TCP_HAS_TIMESTAMPS) 2150 _UpdateRoundTripTime(tcp_diff_timestamp(segment.timestamp_reply)); 2151 else { 2152 // TODO: Fallback to RFC 793 type estimation 2153 } 2154 2155 if (is_writable(fState)) { 2156 // notify threads waiting on the socket to become writable again 2157 fSendList.Signal(); 2158 gSocketModule->notify(socket, B_SELECT_WRITE, fSendQueue.Used()); 2159 } 2160 2161 if (fCongestionWindow < fSlowStartThreshold) 2162 fCongestionWindow += fSendMaxSegmentSize; 2163 } 2164 2165 if (fCongestionWindow >= fSlowStartThreshold) { 2166 uint32 increment = fSendMaxSegmentSize * fSendMaxSegmentSize; 2167 2168 if (increment < fCongestionWindow) 2169 increment = 1; 2170 else 2171 increment /= fCongestionWindow; 2172 2173 fCongestionWindow += increment; 2174 } 2175 2176 // if there is data left to be send, send it now 2177 if (fSendQueue.Used() > 0) 2178 _SendQueued(); 2179 } 2180 2181 2182 void 2183 TCPEndpoint::_Retransmit() 2184 { 2185 TRACE("Retransmit()"); 2186 _ResetSlowStart(); 2187 fSendNext = fSendUnacknowledged; 2188 _SendQueued(); 2189 } 2190 2191 2192 void 2193 TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime) 2194 { 2195 int32 rtt = roundTripTime; 2196 2197 // "smooth" round trip time as per Van Jacobson 2198 rtt -= fRoundTripTime / 8; 2199 fRoundTripTime += rtt; 2200 if (rtt < 0) 2201 rtt = -rtt; 2202 rtt -= fRoundTripDeviation / 4; 2203 fRoundTripDeviation += rtt; 2204 2205 fRetransmitTimeout = ((fRoundTripTime / 4 + fRoundTripDeviation) / 2) 2206 * kTimestampFactor; 2207 2208 TRACE(" RTO is now %llu (after rtt %ldms)", fRetransmitTimeout, 2209 roundTripTime); 2210 } 2211 2212 2213 void 2214 TCPEndpoint::_ResetSlowStart() 2215 { 2216 fSlowStartThreshold = max_c((fSendMax - fSendUnacknowledged).Number() / 2, 2217 2 * fSendMaxSegmentSize); 2218 fCongestionWindow = fSendMaxSegmentSize; 2219 } 2220 2221 2222 // #pragma mark - timer 2223 2224 2225 /*static*/ void 2226 TCPEndpoint::_RetransmitTimer(net_timer* timer, void* _endpoint) 2227 { 2228 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2229 T(Timer(endpoint, "retransmit")); 2230 2231 MutexLocker locker(endpoint->fLock); 2232 if (!locker.IsLocked()) 2233 return; 2234 2235 endpoint->_Retransmit(); 2236 } 2237 2238 2239 /*static*/ void 2240 TCPEndpoint::_PersistTimer(net_timer* timer, void* _endpoint) 2241 { 2242 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2243 T(Timer(endpoint, "persist")); 2244 2245 MutexLocker locker(endpoint->fLock); 2246 if (!locker.IsLocked()) 2247 return; 2248 2249 // the timer might not have been canceled early enough 2250 if (endpoint->State() == CLOSED) 2251 return; 2252 2253 endpoint->_SendQueued(true); 2254 } 2255 2256 2257 /*static*/ void 2258 TCPEndpoint::_DelayedAcknowledgeTimer(net_timer* timer, void* _endpoint) 2259 { 2260 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2261 T(Timer(endpoint, "delayed ack")); 2262 2263 MutexLocker locker(endpoint->fLock); 2264 if (!locker.IsLocked()) 2265 return; 2266 2267 // the timer might not have been canceled early enough 2268 if (endpoint->State() == CLOSED) 2269 return; 2270 2271 endpoint->SendAcknowledge(true); 2272 } 2273 2274 2275 /*static*/ void 2276 TCPEndpoint::_TimeWaitTimer(net_timer* timer, void* _endpoint) 2277 { 2278 TCPEndpoint* endpoint = (TCPEndpoint*)_endpoint; 2279 T(Timer(endpoint, "time-wait")); 2280 2281 MutexLocker locker(endpoint->fLock); 2282 if (!locker.IsLocked()) 2283 return; 2284 2285 if ((endpoint->fFlags & FLAG_CLOSED) == 0) { 2286 endpoint->fFlags |= FLAG_DELETE_ON_CLOSE; 2287 return; 2288 } 2289 2290 locker.Unlock(); 2291 2292 gSocketModule->release_socket(endpoint->socket); 2293 } 2294 2295 2296 // #pragma mark - 2297 2298 2299 void 2300 TCPEndpoint::Dump() const 2301 { 2302 kprintf("TCP endpoint %p\n", this); 2303 kprintf(" state: %s\n", name_for_state(fState)); 2304 kprintf(" flags: 0x%lx\n", fFlags); 2305 #if KDEBUG 2306 kprintf(" lock: { %p, holder: %ld }\n", &fLock, fLock.holder); 2307 #endif 2308 kprintf(" accept sem: %ld\n", fAcceptSemaphore); 2309 kprintf(" options: 0x%lx\n", (uint32)fOptions); 2310 kprintf(" send\n"); 2311 kprintf(" window shift: %u\n", fSendWindowShift); 2312 kprintf(" unacknowledged: %lu\n", fSendUnacknowledged.Number()); 2313 kprintf(" next: %lu\n", fSendNext.Number()); 2314 kprintf(" max: %lu\n", fSendMax.Number()); 2315 kprintf(" urgent offset: %lu\n", fSendUrgentOffset.Number()); 2316 kprintf(" window: %lu\n", fSendWindow); 2317 kprintf(" max window: %lu\n", fSendMaxWindow); 2318 kprintf(" max segment size: %lu\n", fSendMaxSegmentSize); 2319 kprintf(" queue: %lu / %lu\n", fSendQueue.Used(), fSendQueue.Size()); 2320 #if DEBUG_BUFFER_QUEUE 2321 fSendQueue.Dump(); 2322 #endif 2323 kprintf(" last acknowledge sent: %lu\n", fLastAcknowledgeSent.Number()); 2324 kprintf(" initial sequence: %lu\n", fInitialSendSequence.Number()); 2325 kprintf(" receive\n"); 2326 kprintf(" window shift: %u\n", fReceiveWindowShift); 2327 kprintf(" next: %lu\n", fReceiveNext.Number()); 2328 kprintf(" max advertised: %lu\n", fReceiveMaxAdvertised.Number()); 2329 kprintf(" window: %lu\n", fReceiveWindow); 2330 kprintf(" max segment size: %lu\n", fReceiveMaxSegmentSize); 2331 kprintf(" queue: %lu / %lu\n", fReceiveQueue.Available(), 2332 fReceiveQueue.Size()); 2333 #if DEBUG_BUFFER_QUEUE 2334 fReceiveQueue.Dump(); 2335 #endif 2336 kprintf(" initial sequence: %lu\n", fInitialReceiveSequence.Number()); 2337 kprintf(" duplicate acknowledge count: %lu\n", 2338 fDuplicateAcknowledgeCount); 2339 kprintf(" round trip time: %ld (deviation %ld)\n", fRoundTripTime, 2340 fRoundTripDeviation); 2341 kprintf(" retransmit timeout: %lld\n", fRetransmitTimeout); 2342 kprintf(" congestion window: %lu\n", fCongestionWindow); 2343 kprintf(" slow start threshold: %lu\n", fSlowStartThreshold); 2344 } 2345 2346