xref: /haiku/src/add-ons/kernel/network/protocols/tcp/tcp.cpp (revision 909af08f4328301fbdef1ffb41f566c3b5bec0c7)
1 /*
2  * Copyright 2006-2009, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *		Andrew Galante, haiku.galante@gmail.com
8  *		Hugo Santos, hugosantos@gmail.com
9  */
10 
11 
12 #include "EndpointManager.h"
13 #include "TCPEndpoint.h"
14 #include "tcp.h"
15 
16 #include <net_protocol.h>
17 #include <net_stat.h>
18 
19 #include <KernelExport.h>
20 #include <util/list.h>
21 
22 #include <netinet/in.h>
23 #include <netinet/ip.h>
24 #include <new>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include <lock.h>
29 #include <util/AutoLock.h>
30 
31 #include <NetBufferUtilities.h>
32 #include <NetUtilities.h>
33 
34 
35 //#define TRACE_TCP
36 #ifdef TRACE_TCP
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x)
40 #endif
41 
42 
43 typedef NetBufferField<uint16, offsetof(tcp_header, checksum)> TCPChecksumField;
44 
45 
46 net_buffer_module_info *gBufferModule;
47 net_datalink_module_info *gDatalinkModule;
48 net_socket_module_info *gSocketModule;
49 net_stack_module_info *gStackModule;
50 
51 
52 static EndpointManager* sEndpointManagers[AF_MAX];
53 static rw_lock sEndpointManagersLock;
54 
55 
56 // The TCP header length is at most 60 bytes (0xf * 4).
57 static const int kMaxOptionSize = 60 - sizeof(tcp_header);
58 
59 
60 /*!	Returns an endpoint manager for the specified domain, if any.
61 	You need to hold the sEndpointManagersLock when calling this function.
62 */
63 static inline EndpointManager*
64 endpoint_manager_for_locked(int family)
65 {
66 	if (family >= AF_MAX || family < 0)
67 		return NULL;
68 
69 	return sEndpointManagers[family];
70 }
71 
72 
73 /*!	Returns an endpoint manager for the specified domain, if any */
74 static inline EndpointManager*
75 endpoint_manager_for(net_domain* domain)
76 {
77 	ReadLocker _(sEndpointManagersLock);
78 
79 	return endpoint_manager_for_locked(domain->family);
80 }
81 
82 
83 static inline void
84 bump_option(tcp_option *&option, size_t &length)
85 {
86 	if (option->kind <= TCP_OPTION_NOP) {
87 		length++;
88 		option = (tcp_option *)((uint8 *)option + 1);
89 	} else {
90 		length += option->length;
91 		option = (tcp_option *)((uint8 *)option + option->length);
92 	}
93 }
94 
95 
96 static inline size_t
97 add_options(tcp_segment_header &segment, uint8 *buffer, size_t bufferSize)
98 {
99 	// Some network devices can be very sensitive to the ordering of TCP options
100 	// https://github.com/torvalds/linux/blob/9e9fb7655ed585da8f468e29221f0ba194a5f613/net/ipv4/tcp_output.c#L598
101 
102 	tcp_option *option = (tcp_option *)buffer;
103 	size_t length = 0;
104 
105 	if (segment.max_segment_size > 0 && length + 8 <= bufferSize) {
106 		option->kind = TCP_OPTION_MAX_SEGMENT_SIZE;
107 		option->length = 4;
108 		option->max_segment_size = htons(segment.max_segment_size);
109 		bump_option(option, length);
110 	}
111 
112 	if ((segment.options & TCP_HAS_TIMESTAMPS) != 0
113 		&& length + 12 <= bufferSize) {
114 		if ((segment.options & TCP_SACK_PERMITTED) != 0) {
115 			// combine with timestamp
116 			option->kind = TCP_OPTION_SACK_PERMITTED;
117 			option->length = 2;
118 			bump_option(option, length);
119 		} else {
120 			// two NOPs so the timestamps get aligned to a 4 byte boundary
121 			option->kind = TCP_OPTION_NOP;
122 			bump_option(option, length);
123 			option->kind = TCP_OPTION_NOP;
124 			bump_option(option, length);
125 		}
126 		option->kind = TCP_OPTION_TIMESTAMP;
127 		option->length = 10;
128 		option->timestamp.value = htonl(segment.timestamp_value);
129 		option->timestamp.reply = htonl(segment.timestamp_reply);
130 		bump_option(option, length);
131 	} else if ((segment.options & TCP_SACK_PERMITTED) != 0
132 		&& length + 4 <= bufferSize) {
133 		// two NOPs so that the subsequent data is aligned on a 4 byte boundary
134 		option->kind = TCP_OPTION_NOP;
135 		bump_option(option, length);
136 		option->kind = TCP_OPTION_NOP;
137 		bump_option(option, length);
138 		option->kind = TCP_OPTION_SACK_PERMITTED;
139 		option->length = 2;
140 		bump_option(option, length);
141 	}
142 
143 	if ((segment.options & TCP_HAS_WINDOW_SCALE) != 0
144 		&& length + 4 <= bufferSize) {
145 		// one NOP so that the subsequent data is aligned on a 4 byte boundary
146 		option->kind = TCP_OPTION_NOP;
147 		bump_option(option, length);
148 
149 		option->kind = TCP_OPTION_WINDOW_SHIFT;
150 		option->length = 3;
151 		option->window_shift = segment.window_shift;
152 		bump_option(option, length);
153 	}
154 
155 	if (segment.sackCount > 0) {
156 		int sackCount = ((int)(bufferSize - length) - 4) / sizeof(tcp_sack);
157 		if (sackCount > segment.sackCount)
158 			sackCount = segment.sackCount;
159 
160 		if (sackCount > 0) {
161 			option->kind = TCP_OPTION_NOP;
162 			bump_option(option, length);
163 			option->kind = TCP_OPTION_NOP;
164 			bump_option(option, length);
165 			option->kind = TCP_OPTION_SACK;
166 			option->length = 2 + sackCount * sizeof(tcp_sack);
167 			memcpy(option->sack, segment.sacks, sackCount * sizeof(tcp_sack));
168 			bump_option(option, length);
169 		}
170 	}
171 
172 	if ((length & 3) == 0) {
173 		// options completely fill out the option space
174 		return length;
175 	}
176 
177 	option->kind = TCP_OPTION_END;
178 	return (length + 3) & ~3;
179 		// bump to a multiple of 4 length
180 }
181 
182 
183 static void
184 process_options(tcp_segment_header &segment, net_buffer *buffer, size_t size)
185 {
186 	if (size == 0)
187 		return;
188 
189 	tcp_option *option;
190 
191 	uint8 optionsBuffer[kMaxOptionSize];
192 	if (gBufferModule->direct_access(buffer, sizeof(tcp_header), size,
193 			(void **)&option) != B_OK) {
194 		if ((size_t)size > sizeof(optionsBuffer)) {
195 			dprintf("Ignoring TCP options larger than expected.\n");
196 			return;
197 		}
198 
199 		gBufferModule->read(buffer, sizeof(tcp_header), optionsBuffer, size);
200 		option = (tcp_option *)optionsBuffer;
201 	}
202 
203 	while (size > 0) {
204 		int32 length = -1;
205 
206 		switch (option->kind) {
207 			case TCP_OPTION_END:
208 			case TCP_OPTION_NOP:
209 				length = 1;
210 				break;
211 			case TCP_OPTION_MAX_SEGMENT_SIZE:
212 				if (option->length == 4 && size >= 4)
213 					segment.max_segment_size = ntohs(option->max_segment_size);
214 				break;
215 			case TCP_OPTION_WINDOW_SHIFT:
216 				if (option->length == 3 && size >= 3) {
217 					segment.options |= TCP_HAS_WINDOW_SCALE;
218 					segment.window_shift = option->window_shift;
219 				}
220 				break;
221 			case TCP_OPTION_TIMESTAMP:
222 				if (option->length == 10 && size >= 10) {
223 					segment.options |= TCP_HAS_TIMESTAMPS;
224 					segment.timestamp_value = ntohl(option->timestamp.value);
225 					segment.timestamp_reply =
226 						ntohl(option->timestamp.reply);
227 				}
228 				break;
229 			case TCP_OPTION_SACK_PERMITTED:
230 				if (option->length == 2 && size >= 2)
231 					segment.options |= TCP_SACK_PERMITTED;
232 				break;
233 			case TCP_OPTION_SACK:
234 				if (size >= option->length) {
235 					segment.options |= TCP_HAS_SACK;
236 					segment.sackCount = min_c((option->length - 2)
237 						/ sizeof(tcp_sack), MAX_SACK_BLKS);
238 					for(int i = 0; i < segment.sackCount; ++i) {
239 						segment.sacks[i].left_edge = ntohl(
240 							option->sack[i].left_edge);
241 						segment.sacks[i].right_edge = ntohl(
242 							option->sack[i].right_edge);
243 					}
244 				}
245 				break;
246 		}
247 
248 		if (length < 0) {
249 			length = option->length;
250 			if (length == 0 || length > (ssize_t)size)
251 				break;
252 		}
253 
254 		option = (tcp_option *)((uint8 *)option + length);
255 		size -= length;
256 	}
257 }
258 
259 
260 #if 0
261 static void
262 dump_tcp_header(tcp_header &header)
263 {
264 	dprintf("  source port: %u\n", ntohs(header.source_port));
265 	dprintf("  dest port: %u\n", ntohs(header.destination_port));
266 	dprintf("  sequence: %lu\n", header.Sequence());
267 	dprintf("  ack: %lu\n", header.Acknowledge());
268 	dprintf("  flags: %s%s%s%s%s%s\n", (header.flags & TCP_FLAG_FINISH) ? "FIN " : "",
269 		(header.flags & TCP_FLAG_SYNCHRONIZE) ? "SYN " : "",
270 		(header.flags & TCP_FLAG_RESET) ? "RST " : "",
271 		(header.flags & TCP_FLAG_PUSH) ? "PUSH " : "",
272 		(header.flags & TCP_FLAG_ACKNOWLEDGE) ? "ACK " : "",
273 		(header.flags & TCP_FLAG_URGENT) ? "URG " : "");
274 	dprintf("  window: %u\n", header.AdvertisedWindow());
275 	dprintf("  urgent offset: %u\n", header.UrgentOffset());
276 }
277 #endif
278 
279 
280 static int
281 dump_endpoints(int argc, char** argv)
282 {
283 	for (int i = 0; i < AF_MAX; i++) {
284 		EndpointManager* manager = sEndpointManagers[i];
285 		if (manager != NULL)
286 			manager->Dump();
287 	}
288 
289 	return 0;
290 }
291 
292 
293 static int
294 dump_endpoint(int argc, char** argv)
295 {
296 	if (argc < 2) {
297 		kprintf("usage: tcp_endpoint [address]\n");
298 		return 0;
299 	}
300 
301 	TCPEndpoint* endpoint = (TCPEndpoint*)parse_expression(argv[1]);
302 	endpoint->Dump();
303 
304 	return 0;
305 }
306 
307 
308 //	#pragma mark - internal API
309 
310 
311 /*!	Creates a new endpoint manager for the specified domain, or returns
312 	an existing one for this domain.
313 */
314 EndpointManager*
315 get_endpoint_manager(net_domain* domain)
316 {
317 	// See if there is one already
318 	EndpointManager* endpointManager = endpoint_manager_for(domain);
319 	if (endpointManager != NULL)
320 		return endpointManager;
321 
322 	WriteLocker _(sEndpointManagersLock);
323 
324 	endpointManager = endpoint_manager_for_locked(domain->family);
325 	if (endpointManager != NULL)
326 		return endpointManager;
327 
328 	// There is no endpoint manager for this domain yet, so we need
329 	// to create one.
330 
331 	endpointManager = new(std::nothrow) EndpointManager(domain);
332 	if (endpointManager == NULL)
333 		return NULL;
334 
335 	if (endpointManager->Init() != B_OK) {
336 		delete endpointManager;
337 		return NULL;
338 	}
339 
340 	sEndpointManagers[domain->family] = endpointManager;
341 	return endpointManager;
342 }
343 
344 
345 void
346 put_endpoint_manager(EndpointManager* endpointManager)
347 {
348 	// TODO: we may want to use reference counting instead of only discarding
349 	// them on unload. But since there is likely only IPv4/v6 there is not much
350 	// point to it.
351 }
352 
353 
354 const char*
355 name_for_state(tcp_state state)
356 {
357 	switch (state) {
358 		case CLOSED:
359 			return "closed";
360 		case LISTEN:
361 			return "listen";
362 		case SYNCHRONIZE_SENT:
363 			return "syn-sent";
364 		case SYNCHRONIZE_RECEIVED:
365 			return "syn-received";
366 		case ESTABLISHED:
367 			return "established";
368 
369 		// peer closes the connection
370 		case FINISH_RECEIVED:
371 			return "close-wait";
372 		case WAIT_FOR_FINISH_ACKNOWLEDGE:
373 			return "last-ack";
374 
375 		// we close the connection
376 		case FINISH_SENT:
377 			return "fin-wait1";
378 		case FINISH_ACKNOWLEDGED:
379 			return "fin-wait2";
380 		case CLOSING:
381 			return "closing";
382 
383 		case TIME_WAIT:
384 			return "time-wait";
385 	}
386 
387 	return "-";
388 }
389 
390 
391 /*!	Constructs a TCP header on \a buffer with the specified values
392 	for \a flags, \a seq \a ack and \a advertisedWindow.
393 */
394 status_t
395 add_tcp_header(net_address_module_info* addressModule,
396 	tcp_segment_header& segment, net_buffer* buffer)
397 {
398 	buffer->protocol = IPPROTO_TCP;
399 
400 	uint8 optionsBuffer[kMaxOptionSize];
401 	uint32 optionsLength = add_options(segment, optionsBuffer,
402 		sizeof(optionsBuffer));
403 
404 	NetBufferPrepend<tcp_header> bufferHeader(buffer,
405 		sizeof(tcp_header) + optionsLength);
406 	if (bufferHeader.Status() != B_OK)
407 		return bufferHeader.Status();
408 
409 	tcp_header& header = bufferHeader.Data();
410 
411 	header.source_port = addressModule->get_port(buffer->source);
412 	header.destination_port = addressModule->get_port(buffer->destination);
413 	header.sequence = htonl(segment.sequence);
414 	header.acknowledge = (segment.flags & TCP_FLAG_ACKNOWLEDGE)
415 		? htonl(segment.acknowledge) : 0;
416 	header.reserved = 0;
417 	header.header_length = (sizeof(tcp_header) + optionsLength) >> 2;
418 	header.flags = segment.flags;
419 	header.advertised_window = htons(segment.advertised_window);
420 	header.checksum = 0;
421 	header.urgent_offset = htons(segment.urgent_offset);
422 
423 	// we must detach before calculating the checksum as we may
424 	// not have a contiguous buffer.
425 	bufferHeader.Sync();
426 
427 	if (optionsLength > 0) {
428 		gBufferModule->write(buffer, sizeof(tcp_header), optionsBuffer,
429 			optionsLength);
430 	}
431 
432 	TRACE(("add_tcp_header(): buffer %p, flags 0x%x, seq %" B_PRIu32 ", ack %" B_PRIu32 ", up %u, "
433 		"win %u\n", buffer, segment.flags, segment.sequence,
434 		segment.acknowledge, segment.urgent_offset, segment.advertised_window));
435 
436 	*TCPChecksumField(buffer) = Checksum::PseudoHeader(addressModule,
437 		gBufferModule, buffer, IPPROTO_TCP);
438 	buffer->buffer_flags |= NET_BUFFER_L4_CHECKSUM_VALID;
439 
440 	return B_OK;
441 }
442 
443 
444 size_t
445 tcp_options_length(tcp_segment_header& segment)
446 {
447 	size_t length = 0;
448 
449 	if (segment.max_segment_size > 0)
450 		length += 4;
451 
452 	if ((segment.options & TCP_HAS_TIMESTAMPS) != 0)
453 		length += 12;
454 	else if ((segment.options & TCP_SACK_PERMITTED) != 0)
455 		length += 4;
456 
457 	if ((segment.options & TCP_HAS_WINDOW_SCALE) != 0)
458 		length += 4;
459 
460 	if (segment.sackCount > 0) {
461 		int sackCount = min_c((int)((kMaxOptionSize - length - 4)
462 			/ sizeof(tcp_sack)), segment.sackCount);
463 		if (sackCount > 0)
464 			length += 4 + sackCount * sizeof(tcp_sack);
465 	}
466 
467 	if ((length & 3) == 0)
468 		return length;
469 
470 	return (length + 3) & ~3;
471 }
472 
473 
474 //	#pragma mark - protocol API
475 
476 
477 net_protocol*
478 tcp_init_protocol(net_socket* socket)
479 {
480 	TCPEndpoint* protocol = new (std::nothrow) TCPEndpoint(socket);
481 	if (protocol == NULL)
482 		return NULL;
483 
484 	if (protocol->InitCheck() != B_OK) {
485 		delete protocol;
486 		return NULL;
487 	}
488 
489 	TRACE(("Creating new TCPEndpoint: %p\n", protocol));
490 	socket->protocol = IPPROTO_TCP;
491 	return protocol;
492 }
493 
494 
495 status_t
496 tcp_uninit_protocol(net_protocol* protocol)
497 {
498 	TRACE(("Deleting TCPEndpoint: %p\n", protocol));
499 	delete (TCPEndpoint*)protocol;
500 	return B_OK;
501 }
502 
503 
504 status_t
505 tcp_open(net_protocol* protocol)
506 {
507 	return ((TCPEndpoint*)protocol)->Open();
508 }
509 
510 
511 status_t
512 tcp_close(net_protocol* protocol)
513 {
514 	return ((TCPEndpoint*)protocol)->Close();
515 }
516 
517 
518 status_t
519 tcp_free(net_protocol* protocol)
520 {
521 	((TCPEndpoint*)protocol)->Free();
522 	return B_OK;
523 }
524 
525 
526 status_t
527 tcp_connect(net_protocol* protocol, const struct sockaddr* address)
528 {
529 	return ((TCPEndpoint*)protocol)->Connect(address);
530 }
531 
532 
533 status_t
534 tcp_accept(net_protocol* protocol, struct net_socket** _acceptedSocket)
535 {
536 	return ((TCPEndpoint*)protocol)->Accept(_acceptedSocket);
537 }
538 
539 
540 status_t
541 tcp_control(net_protocol* _protocol, int level, int option, void* value,
542 	size_t* _length)
543 {
544 	TCPEndpoint* protocol = (TCPEndpoint*)_protocol;
545 
546 	if ((level & LEVEL_MASK) == IPPROTO_TCP) {
547 		if (option == NET_STAT_SOCKET)
548 			return protocol->FillStat((net_stat*)value);
549 	}
550 
551 	return protocol->next->module->control(protocol->next, level, option,
552 		value, _length);
553 }
554 
555 
556 status_t
557 tcp_getsockopt(net_protocol* _protocol, int level, int option, void* value,
558 	int* _length)
559 {
560 	TCPEndpoint* protocol = (TCPEndpoint*)_protocol;
561 
562 	if (level == IPPROTO_TCP)
563 		return protocol->GetOption(option, value, _length);
564 
565 	return protocol->next->module->getsockopt(protocol->next, level, option,
566 		value, _length);
567 }
568 
569 
570 status_t
571 tcp_setsockopt(net_protocol* _protocol, int level, int option,
572 	const void* _value, int length)
573 {
574 	TCPEndpoint* protocol = (TCPEndpoint*)_protocol;
575 
576 	if (level == SOL_SOCKET) {
577 		if (option == SO_SNDBUF || option == SO_RCVBUF) {
578 			if (length != sizeof(int))
579 				return B_BAD_VALUE;
580 
581 			status_t status;
582 			const int* value = (const int*)_value;
583 
584 			if (option == SO_SNDBUF)
585 				status = protocol->SetSendBufferSize(*value);
586 			else
587 				status = protocol->SetReceiveBufferSize(*value);
588 
589 			if (status < B_OK)
590 				return status;
591 		}
592 	} else if (level == IPPROTO_TCP)
593 		return protocol->SetOption(option, _value, length);
594 
595 	return protocol->next->module->setsockopt(protocol->next, level, option,
596 		_value, length);
597 }
598 
599 
600 status_t
601 tcp_bind(net_protocol* protocol, const struct sockaddr* address)
602 {
603 	return ((TCPEndpoint*)protocol)->Bind(address);
604 }
605 
606 
607 status_t
608 tcp_unbind(net_protocol* protocol, struct sockaddr* address)
609 {
610 	return ((TCPEndpoint*)protocol)->Unbind(address);
611 }
612 
613 
614 status_t
615 tcp_listen(net_protocol* protocol, int count)
616 {
617 	return ((TCPEndpoint*)protocol)->Listen(count);
618 }
619 
620 
621 status_t
622 tcp_shutdown(net_protocol* protocol, int direction)
623 {
624 	return ((TCPEndpoint*)protocol)->Shutdown(direction);
625 }
626 
627 
628 status_t
629 tcp_send_data(net_protocol* protocol, net_buffer* buffer)
630 {
631 	return ((TCPEndpoint*)protocol)->SendData(buffer);
632 }
633 
634 
635 status_t
636 tcp_send_routed_data(net_protocol* protocol, struct net_route* route,
637 	net_buffer* buffer)
638 {
639 	// TCP never sends routed data
640 	return B_ERROR;
641 }
642 
643 
644 ssize_t
645 tcp_send_avail(net_protocol* protocol)
646 {
647 	return ((TCPEndpoint*)protocol)->SendAvailable();
648 }
649 
650 
651 status_t
652 tcp_read_data(net_protocol* protocol, size_t numBytes, uint32 flags,
653 	net_buffer** _buffer)
654 {
655 	return ((TCPEndpoint*)protocol)->ReadData(numBytes, flags, _buffer);
656 }
657 
658 
659 ssize_t
660 tcp_read_avail(net_protocol* protocol)
661 {
662 	return ((TCPEndpoint*)protocol)->ReadAvailable();
663 }
664 
665 
666 struct net_domain*
667 tcp_get_domain(net_protocol* protocol)
668 {
669 	return protocol->next->module->get_domain(protocol->next);
670 }
671 
672 
673 size_t
674 tcp_get_mtu(net_protocol* protocol, const struct sockaddr* address)
675 {
676 	return protocol->next->module->get_mtu(protocol->next, address);
677 }
678 
679 
680 status_t
681 tcp_receive_data(net_buffer* buffer)
682 {
683 	TRACE(("TCP: Received buffer %p\n", buffer));
684 
685 	if (buffer->interface_address == NULL
686 		|| buffer->interface_address->domain == NULL)
687 		return B_ERROR;
688 
689 	net_domain* domain = buffer->interface_address->domain;
690 	net_address_module_info* addressModule = domain->address_module;
691 
692 	NetBufferHeaderReader<tcp_header> bufferHeader(buffer);
693 	if (bufferHeader.Status() < B_OK)
694 		return bufferHeader.Status();
695 
696 	tcp_header& header = bufferHeader.Data();
697 
698 	uint16 headerLength = header.HeaderLength();
699 	if (headerLength < sizeof(tcp_header))
700 		return B_BAD_DATA;
701 
702 	if ((buffer->buffer_flags & NET_BUFFER_L4_CHECKSUM_VALID) == 0) {
703 		if (Checksum::PseudoHeader(addressModule, gBufferModule, buffer, IPPROTO_TCP) != 0)
704 			return B_BAD_DATA;
705 	}
706 
707 	addressModule->set_port(buffer->source, header.source_port);
708 	addressModule->set_port(buffer->destination, header.destination_port);
709 
710 	TRACE(("  Looking for: peer %s, local %s\n",
711 		AddressString(domain, buffer->source, true).Data(),
712 		AddressString(domain, buffer->destination, true).Data()));
713 	//dump_tcp_header(header);
714 	//gBufferModule->dump(buffer);
715 
716 	tcp_segment_header segment(header.flags);
717 	segment.sequence = header.Sequence();
718 	segment.acknowledge = header.Acknowledge();
719 	segment.advertised_window = header.AdvertisedWindow();
720 	segment.urgent_offset = header.UrgentOffset();
721 	process_options(segment, buffer, headerLength - sizeof(tcp_header));
722 
723 	bufferHeader.Remove(headerLength);
724 		// we no longer need to keep the header around
725 
726 	EndpointManager* endpointManager = endpoint_manager_for(domain);
727 	if (endpointManager == NULL) {
728 		TRACE(("  No endpoint manager!\n"));
729 		return B_ERROR;
730 	}
731 
732 	int32 segmentAction = DROP;
733 
734 	TCPEndpoint* endpoint = endpointManager->FindConnection(
735 		buffer->destination, buffer->source);
736 	if (endpoint != NULL) {
737 		segmentAction = endpoint->SegmentReceived(segment, buffer);
738 
739 		// There are some states in which the socket could have been deleted
740 		// while handling a segment. If this flag is set in segmentAction
741 		// then we know the socket has been freed and can skip releasing
742 		// the reference acquired in EndpointManager::FindConnection().
743 		if ((segmentAction & DELETED_ENDPOINT) == 0)
744 			gSocketModule->release_socket(endpoint->socket);
745 	} else if ((segment.flags & TCP_FLAG_RESET) == 0)
746 		segmentAction = DROP | RESET;
747 
748 	if ((segmentAction & RESET) != 0) {
749 		// send reset
750 		endpointManager->ReplyWithReset(segment, buffer);
751 	}
752 	if ((segmentAction & DROP) != 0)
753 		gBufferModule->free(buffer);
754 
755 	return B_OK;
756 }
757 
758 
759 status_t
760 tcp_error_received(net_error error, net_buffer* data)
761 {
762 	return B_ERROR;
763 }
764 
765 
766 status_t
767 tcp_error_reply(net_protocol* protocol, net_buffer* cause, net_error error,
768 	net_error_data* errorData)
769 {
770 	return B_ERROR;
771 }
772 
773 
774 //	#pragma mark -
775 
776 
777 static status_t
778 tcp_init()
779 {
780 	rw_lock_init(&sEndpointManagersLock, "endpoint managers");
781 
782 	status_t status = gStackModule->register_domain_protocols(AF_INET,
783 		SOCK_STREAM, 0,
784 		"network/protocols/tcp/v1",
785 		"network/protocols/ipv4/v1",
786 		NULL);
787 	if (status < B_OK)
788 		return status;
789 	status = gStackModule->register_domain_protocols(AF_INET6,
790 		SOCK_STREAM, 0,
791 		"network/protocols/tcp/v1",
792 		"network/protocols/ipv6/v1",
793 		NULL);
794 	if (status < B_OK)
795 		return status;
796 
797 	status = gStackModule->register_domain_protocols(AF_INET, SOCK_STREAM,
798 		IPPROTO_TCP,
799 		"network/protocols/tcp/v1",
800 		"network/protocols/ipv4/v1",
801 		NULL);
802 	if (status < B_OK)
803 		return status;
804 	status = gStackModule->register_domain_protocols(AF_INET6, SOCK_STREAM,
805 		IPPROTO_TCP,
806 		"network/protocols/tcp/v1",
807 		"network/protocols/ipv6/v1",
808 		NULL);
809 	if (status < B_OK)
810 		return status;
811 
812 	status = gStackModule->register_domain_receiving_protocol(AF_INET,
813 		IPPROTO_TCP, "network/protocols/tcp/v1");
814 	if (status < B_OK)
815 		return status;
816 	status = gStackModule->register_domain_receiving_protocol(AF_INET6,
817 		IPPROTO_TCP, "network/protocols/tcp/v1");
818 	if (status < B_OK)
819 		return status;
820 
821 	add_debugger_command("tcp_endpoints", dump_endpoints,
822 		"lists all open TCP endpoints");
823 	add_debugger_command("tcp_endpoint", dump_endpoint,
824 		"dumps a TCP endpoint internal state");
825 
826 	return B_OK;
827 }
828 
829 
830 static status_t
831 tcp_uninit()
832 {
833 	remove_debugger_command("tcp_endpoint", dump_endpoint);
834 	remove_debugger_command("tcp_endpoints", dump_endpoints);
835 
836 	rw_lock_destroy(&sEndpointManagersLock);
837 
838 	for (int i = 0; i < AF_MAX; i++) {
839 		delete sEndpointManagers[i];
840 	}
841 
842 	return B_OK;
843 }
844 
845 
846 static status_t
847 tcp_std_ops(int32 op, ...)
848 {
849 	switch (op) {
850 		case B_MODULE_INIT:
851 			return tcp_init();
852 
853 		case B_MODULE_UNINIT:
854 			return tcp_uninit();
855 
856 		default:
857 			return B_ERROR;
858 	}
859 }
860 
861 
862 net_protocol_module_info sTCPModule = {
863 	{
864 		"network/protocols/tcp/v1",
865 		0,
866 		tcp_std_ops
867 	},
868 	0,
869 
870 	tcp_init_protocol,
871 	tcp_uninit_protocol,
872 	tcp_open,
873 	tcp_close,
874 	tcp_free,
875 	tcp_connect,
876 	tcp_accept,
877 	tcp_control,
878 	tcp_getsockopt,
879 	tcp_setsockopt,
880 	tcp_bind,
881 	tcp_unbind,
882 	tcp_listen,
883 	tcp_shutdown,
884 	tcp_send_data,
885 	tcp_send_routed_data,
886 	tcp_send_avail,
887 	tcp_read_data,
888 	tcp_read_avail,
889 	tcp_get_domain,
890 	tcp_get_mtu,
891 	tcp_receive_data,
892 	NULL,		// deliver_data()
893 	tcp_error_received,
894 	tcp_error_reply,
895 	NULL,		// add_ancillary_data()
896 	NULL,		// process_ancillary_data()
897 	NULL,		// process_ancillary_data_no_container()
898 	NULL,		// send_data_no_buffer()
899 	NULL		// read_data_no_buffer()
900 };
901 
902 module_dependency module_dependencies[] = {
903 	{NET_STACK_MODULE_NAME, (module_info **)&gStackModule},
904 	{NET_BUFFER_MODULE_NAME, (module_info **)&gBufferModule},
905 	{NET_DATALINK_MODULE_NAME, (module_info **)&gDatalinkModule},
906 	{NET_SOCKET_MODULE_NAME, (module_info **)&gSocketModule},
907 	{}
908 };
909 
910 module_info *modules[] = {
911 	(module_info *)&sTCPModule,
912 	NULL
913 };
914