4 * Transmission Control Protocol, incoming traffic
6 * The input processing functions of TCP.
8 * These functions are generally called in the order (ip_input() ->) tcp_input() ->
9 * tcp_process() -> tcp_receive() (-> application).
14 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without modification,
18 * are permitted provided that the following conditions are met:
20 * 1. Redistributions of source code must retain the above copyright notice,
21 * this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright notice,
23 * this list of conditions and the following disclaimer in the documentation
24 * and/or other materials provided with the distribution.
25 * 3. The name of the author may not be used to endorse or promote products
26 * derived from this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
29 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
30 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
31 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
33 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
36 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
39 * This file is part of the lwIP TCP/IP stack.
41 * Author: Adam Dunkels <adam@sics.se>
48 #include "lwip/ip_addr.h"
49 #include "lwip/netif.h"
51 #include "lwip/memp.h"
53 #include "lwip/inet.h"
56 #include "lwip/stats.h"
58 #include "arch/perf.h"
60 /* These variables are global to all functions involved in the input
61 processing of TCP segments. They are set by the tcp_input()
63 static struct tcp_seg inseg;
64 static struct tcp_hdr *tcphdr;
65 static struct ip_hdr *iphdr;
66 static u32_t seqno, ackno;
70 static u8_t recv_flags;
71 static struct pbuf *recv_data;
73 struct tcp_pcb *tcp_input_pcb;
75 /* Forward declarations. */
76 static err_t tcp_process(struct tcp_pcb *pcb);
77 static void tcp_receive(struct tcp_pcb *pcb);
78 static void tcp_parseopt(struct tcp_pcb *pcb);
80 static err_t tcp_listen_input(struct tcp_pcb_listen *pcb);
81 static err_t tcp_timewait_input(struct tcp_pcb *pcb);
86 * The initial input processing of TCP. It verifies the TCP header, demultiplexes
87 * the segment between the PCBs and passes it on to tcp_process(), which implements
88 * the TCP finite state machine. This function is called by the IP layer (in
93 tcp_input(struct pbuf *p, struct netif *inp)
95 struct tcp_pcb *pcb, *prev;
96 struct tcp_pcb_listen *lpcb;
101 struct tcp_pcb *pcb_temp;
104 #endif /* SO_REUSE */
108 TCP_STATS_INC(tcp.recv);
111 tcphdr = (struct tcp_hdr *)((u8_t *)p->payload + IPH_HL(iphdr) * 4);
114 tcp_debug_print(tcphdr);
117 /* remove header from payload */
118 if (pbuf_header(p, -((s16_t)(IPH_HL(iphdr) * 4))) || (p->tot_len < sizeof(struct tcp_hdr))) {
119 /* drop short packets */
120 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: short packet (%u bytes) discarded\n", p->tot_len));
121 TCP_STATS_INC(tcp.lenerr);
122 TCP_STATS_INC(tcp.drop);
127 /* Don't even process incoming broadcasts/multicasts. */
128 if (ip_addr_isbroadcast(&(iphdr->dest), inp) ||
129 ip_addr_ismulticast(&(iphdr->dest))) {
134 #if CHECKSUM_CHECK_TCP
135 /* Verify TCP checksum. */
136 if (inet_chksum_pseudo(p, (struct ip_addr *)&(iphdr->src),
137 (struct ip_addr *)&(iphdr->dest),
138 IP_PROTO_TCP, p->tot_len) != 0) {
139 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packet discarded due to failing checksum 0x%04x\n",
140 inet_chksum_pseudo(p, (struct ip_addr *)&(iphdr->src), (struct ip_addr *)&(iphdr->dest),
141 IP_PROTO_TCP, p->tot_len)));
143 tcp_debug_print(tcphdr);
144 #endif /* TCP_DEBUG */
145 TCP_STATS_INC(tcp.chkerr);
146 TCP_STATS_INC(tcp.drop);
153 /* Move the payload pointer in the pbuf so that it points to the
154 TCP data instead of the TCP header. */
155 hdrlen = TCPH_HDRLEN(tcphdr);
156 pbuf_header(p, -(hdrlen * 4));
158 /* Convert fields in TCP header to host byte order. */
159 tcphdr->src = ntohs(tcphdr->src);
160 tcphdr->dest = ntohs(tcphdr->dest);
161 seqno = tcphdr->seqno = ntohl(tcphdr->seqno);
162 ackno = tcphdr->ackno = ntohl(tcphdr->ackno);
163 tcphdr->wnd = ntohs(tcphdr->wnd);
165 flags = TCPH_FLAGS(tcphdr) & TCP_FLAGS;
166 tcplen = p->tot_len + ((flags & TCP_FIN || flags & TCP_SYN)? 1: 0);
168 /* Demultiplex an incoming segment. First, we check if it is destined
169 for an active connection. */
173 pcb_temp = tcp_active_pcbs;
177 /* Iterate through the TCP pcb list for a fully matching pcb */
178 for(pcb = pcb_temp; pcb != NULL; pcb = pcb->next) {
180 for(pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
181 #endif /* SO_REUSE */
182 LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb->state != CLOSED);
183 LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb->state != TIME_WAIT);
184 LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb->state != LISTEN);
185 if (pcb->remote_port == tcphdr->src &&
186 pcb->local_port == tcphdr->dest &&
187 ip_addr_cmp(&(pcb->remote_ip), &(iphdr->src)) &&
188 ip_addr_cmp(&(pcb->local_ip), &(iphdr->dest))) {
191 if(pcb->so_options & SOF_REUSEPORT) {
193 /* We processed one PCB already */
194 LWIP_DEBUGF(TCP_INPUT_DEBUG,("tcp_input: second or later PCB and SOF_REUSEPORT set.\n"));
196 /* First PCB with this address */
197 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: first PCB and SOF_REUSEPORT set.\n"));
204 /* We want to search on next socket after receiving */
205 pcb_temp = pcb->next;
207 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: reference counter on PBUF set to %i\n", p->ref));
210 /* We processed one PCB already */
211 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: second or later PCB but SOF_REUSEPORT not set !\n"));
214 #endif /* SO_REUSE */
216 /* Move this PCB to the front of the list so that subsequent
217 lookups will be faster (we exploit locality in TCP segment
219 LWIP_ASSERT("tcp_input: pcb->next != pcb (before cache)", pcb->next != pcb);
221 prev->next = pcb->next;
222 pcb->next = tcp_active_pcbs;
223 tcp_active_pcbs = pcb;
225 LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb->next != pcb);
232 /* If it did not go to an active connection, we check the connections
233 in the TIME-WAIT state. */
235 for(pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
236 LWIP_ASSERT("tcp_input: TIME-WAIT pcb->state == TIME-WAIT", pcb->state == TIME_WAIT);
237 if (pcb->remote_port == tcphdr->src &&
238 pcb->local_port == tcphdr->dest &&
239 ip_addr_cmp(&(pcb->remote_ip), &(iphdr->src)) &&
240 ip_addr_cmp(&(pcb->local_ip), &(iphdr->dest))) {
241 /* We don't really care enough to move this PCB to the front
242 of the list since we are not very likely to receive that
243 many segments for connections in TIME-WAIT. */
244 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for TIME_WAITing connection.\n"));
245 tcp_timewait_input(pcb);
251 /* Finally, if we still did not get a match, we check all PCBs that
252 are LISTENing for incoming connections. */
254 for(lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) {
255 if ((ip_addr_isany(&(lpcb->local_ip)) ||
256 ip_addr_cmp(&(lpcb->local_ip), &(iphdr->dest))) &&
257 lpcb->local_port == tcphdr->dest) {
258 /* Move this PCB to the front of the list so that subsequent
259 lookups will be faster (we exploit locality in TCP segment
262 ((struct tcp_pcb_listen *)prev)->next = lpcb->next;
263 /* our successor is the remainder of the listening list */
264 lpcb->next = tcp_listen_pcbs.listen_pcbs;
265 /* put this listening pcb at the head of the listening list */
266 tcp_listen_pcbs.listen_pcbs = lpcb;
269 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for LISTENing connection.\n"));
270 tcp_listen_input(lpcb);
274 prev = (struct tcp_pcb *)lpcb;
279 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("+-+-+-+-+-+-+-+-+-+-+-+-+-+- tcp_input: flags "));
280 tcp_debug_print_flags(TCPH_FLAGS(tcphdr));
281 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n"));
282 #endif /* TCP_INPUT_DEBUG */
286 /* The incoming segment belongs to a connection. */
289 tcp_debug_print_state(pcb->state);
290 #endif /* TCP_DEBUG */
291 #endif /* TCP_INPUT_DEBUG */
293 /* Set up a tcp_seg structure. */
295 inseg.len = p->tot_len;
296 inseg.dataptr = p->payload;
298 inseg.tcphdr = tcphdr;
304 err = tcp_process(pcb);
305 tcp_input_pcb = NULL;
306 /* A return value of ERR_ABRT means that tcp_abort() was called
307 and that the pcb has been freed. If so, we don't do anything. */
308 if (err != ERR_ABRT) {
309 if (recv_flags & TF_RESET) {
310 /* TF_RESET means that the connection was reset by the other
311 end. We then call the error callback to inform the
312 application that the connection is dead before we
313 deallocate the PCB. */
314 TCP_EVENT_ERR(pcb->errf, pcb->callback_arg, ERR_RST);
315 tcp_pcb_remove(&tcp_active_pcbs, pcb);
316 memp_free(MEMP_TCP_PCB, pcb);
317 } else if (recv_flags & TF_CLOSED) {
318 /* The connection has been closed and we will deallocate the
320 tcp_pcb_remove(&tcp_active_pcbs, pcb);
321 memp_free(MEMP_TCP_PCB, pcb);
324 /* If the application has registered a "sent" function to be
325 called when new send buffer space is available, we call it
327 if (pcb->acked > 0) {
328 TCP_EVENT_SENT(pcb, pcb->acked, err);
331 if (recv_data != NULL) {
332 /* Notify application that data has been received. */
333 TCP_EVENT_RECV(pcb, recv_data, ERR_OK, err);
336 /* If a FIN segment was received, we call the callback
337 function with a NULL buffer to indicate EOF. */
338 if (recv_flags & TF_GOT_FIN) {
339 TCP_EVENT_RECV(pcb, NULL, ERR_OK, err);
341 /* If there were no errors, we try to send something out. */
349 /* We deallocate the incoming pbuf. If it was buffered by the
350 application, the application should have called pbuf_ref() to
351 increase the reference counter in the pbuf. If so, the buffer
352 isn't actually deallocated by the call to pbuf_free(), only the
353 reference count is decreased. */
357 tcp_debug_print_state(pcb->state);
358 #endif /* TCP_DEBUG */
359 #endif /* TCP_INPUT_DEBUG */
361 /* First socket should receive now */
363 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: searching next PCB.\n"));
366 /* We are searching connected sockets */
369 #endif /* SO_REUSE */
374 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: freeing PBUF with reference counter set to %i\n", p->ref));
378 #endif /* SO_REUSE */
379 /* If no matching PCB was found, send a TCP RST (reset) to the
381 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_input: no PCB match found, resetting.\n"));
382 if (!(TCPH_FLAGS(tcphdr) & TCP_RST)) {
383 TCP_STATS_INC(tcp.proterr);
384 TCP_STATS_INC(tcp.drop);
385 tcp_rst(ackno, seqno + tcplen,
386 &(iphdr->dest), &(iphdr->src),
387 tcphdr->dest, tcphdr->src);
393 #endif /* SO_REUSE */
394 LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane());
395 PERF_STOP("tcp_input");
398 /* tcp_listen_input():
400 * Called by tcp_input() when a segment arrives for a listening
405 tcp_listen_input(struct tcp_pcb_listen *pcb)
407 struct tcp_pcb *npcb;
410 /* In the LISTEN state, we check for incoming SYN segments,
411 creates a new PCB, and responds with a SYN|ACK. */
412 if (flags & TCP_ACK) {
413 /* For incoming segments with the ACK flag set, respond with a
415 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_listen_input: ACK in LISTEN, sending reset\n"));
416 tcp_rst(ackno + 1, seqno + tcplen,
417 &(iphdr->dest), &(iphdr->src),
418 tcphdr->dest, tcphdr->src);
419 } else if (flags & TCP_SYN) {
420 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection request %u -> %u.\n", tcphdr->src, tcphdr->dest));
421 npcb = tcp_alloc(pcb->prio);
422 /* If a new PCB could not be created (probably due to lack of memory),
423 we don't do anything, but rely on the sender will retransmit the
424 SYN at a time when we have more memory available. */
426 LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: could not allocate PCB\n"));
427 TCP_STATS_INC(tcp.memerr);
430 /* Set up the new PCB. */
431 ip_addr_set(&(npcb->local_ip), &(iphdr->dest));
432 npcb->local_port = pcb->local_port;
433 ip_addr_set(&(npcb->remote_ip), &(iphdr->src));
434 npcb->remote_port = tcphdr->src;
435 npcb->state = SYN_RCVD;
436 npcb->rcv_nxt = seqno + 1;
437 npcb->snd_wnd = tcphdr->wnd;
438 npcb->ssthresh = npcb->snd_wnd;
439 npcb->snd_wl1 = seqno - 1;/* initialise to seqno-1 to force window update */
440 npcb->callback_arg = pcb->callback_arg;
441 #if LWIP_CALLBACK_API
442 npcb->accept = pcb->accept;
443 #endif /* LWIP_CALLBACK_API */
444 /* inherit socket options */
445 npcb->so_options = pcb->so_options & (SOF_DEBUG|SOF_DONTROUTE|SOF_KEEPALIVE|SOF_OOBINLINE|SOF_LINGER);
446 /* Register the new PCB so that we can begin receiving segments
448 TCP_REG(&tcp_active_pcbs, npcb);
450 /* Parse any options in the SYN. */
453 /* Build an MSS option. */
454 optdata = htonl(((u32_t)2 << 24) |
456 (((u32_t)npcb->mss / 256) << 8) |
458 /* Send a SYN|ACK together with the MSS option. */
459 tcp_enqueue(npcb, NULL, 0, TCP_SYN | TCP_ACK, 0, (u8_t *)&optdata, 4);
460 return tcp_output(npcb);
465 /* tcp_timewait_input():
467 * Called by tcp_input() when a segment arrives for a connection in
472 tcp_timewait_input(struct tcp_pcb *pcb)
474 if (TCP_SEQ_GT(seqno + tcplen, pcb->rcv_nxt)) {
475 pcb->rcv_nxt = seqno + tcplen;
480 return tcp_output(pcb);
485 * Implements the TCP state machine. Called by tcp_input. In some
486 * states tcp_receive() is called to receive data. The tcp_seg
487 * argument will be freed by the caller (tcp_input()) unless the
488 * recv_data pointer in the pcb is set.
492 tcp_process(struct tcp_pcb *pcb)
494 struct tcp_seg *rseg;
501 /* Process incoming RST segments. */
502 if (flags & TCP_RST) {
503 /* First, determine if the reset is acceptable. */
504 if (pcb->state == SYN_SENT) {
505 if (ackno == pcb->snd_nxt) {
509 if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
510 TCP_SEQ_LEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
516 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: Connection RESET\n"));
517 LWIP_ASSERT("tcp_input: pcb->state != CLOSED", pcb->state != CLOSED);
518 recv_flags = TF_RESET;
519 pcb->flags &= ~TF_ACK_DELAY;
522 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: unacceptable reset seqno %lu rcv_nxt %lu\n",
523 seqno, pcb->rcv_nxt));
524 LWIP_DEBUGF(TCP_DEBUG, ("tcp_process: unacceptable reset seqno %lu rcv_nxt %lu\n",
525 seqno, pcb->rcv_nxt));
530 /* Update the PCB (in)activity timer. */
531 pcb->tmr = tcp_ticks;
534 /* Do different things depending on the TCP state. */
535 switch (pcb->state) {
537 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("SYN-SENT: ackno %lu pcb->snd_nxt %lu unacked %lu\n", ackno,
538 pcb->snd_nxt, ntohl(pcb->unacked->tcphdr->seqno)));
539 if ((flags & TCP_ACK) && (flags & TCP_SYN)
540 && ackno == ntohl(pcb->unacked->tcphdr->seqno) + 1) {
541 pcb->rcv_nxt = seqno + 1;
542 pcb->lastack = ackno;
543 pcb->snd_wnd = tcphdr->wnd;
544 pcb->snd_wl1 = seqno - 1; /* initialise to seqno - 1 to force window update */
545 pcb->state = ESTABLISHED;
546 pcb->cwnd = pcb->mss;
548 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_process: SYN-SENT --queuelen %u\n", (unsigned int)pcb->snd_queuelen));
550 pcb->unacked = rseg->next;
553 /* Parse any options in the SYNACK. */
556 /* Call the user specified function to call when sucessfully
558 TCP_EVENT_CONNECTED(pcb, ERR_OK, err);
563 if (flags & TCP_ACK &&
564 !(flags & TCP_RST)) {
565 if (TCP_SEQ_LT(pcb->lastack, ackno) &&
566 TCP_SEQ_LEQ(ackno, pcb->snd_nxt)) {
567 pcb->state = ESTABLISHED;
568 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection established %u -> %u.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
569 #if LWIP_CALLBACK_API
570 LWIP_ASSERT("pcb->accept != NULL", pcb->accept != NULL);
572 /* Call the accept function. */
573 TCP_EVENT_ACCEPT(pcb, ERR_OK, err);
575 /* If the accept function returns with an error, we abort
580 /* If there was any data contained within this ACK,
581 * we'd better pass it on to the application as well. */
583 pcb->cwnd = pcb->mss;
591 if (flags & TCP_FIN) {
593 pcb->state = CLOSE_WAIT;
598 if (flags & TCP_FIN) {
599 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
600 LWIP_DEBUGF(TCP_DEBUG,
601 ("TCP connection closed %d -> %d.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
604 TCP_RMV(&tcp_active_pcbs, pcb);
605 pcb->state = TIME_WAIT;
606 TCP_REG(&tcp_tw_pcbs, pcb);
609 pcb->state = CLOSING;
611 } else if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
612 pcb->state = FIN_WAIT_2;
617 if (flags & TCP_FIN) {
618 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %u -> %u.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
621 TCP_RMV(&tcp_active_pcbs, pcb);
622 pcb->state = TIME_WAIT;
623 TCP_REG(&tcp_tw_pcbs, pcb);
628 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
629 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %u -> %u.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
632 TCP_RMV(&tcp_active_pcbs, pcb);
633 pcb->state = TIME_WAIT;
634 TCP_REG(&tcp_tw_pcbs, pcb);
639 if (flags & TCP_ACK && ackno == pcb->snd_nxt) {
640 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed %u -> %u.\n", inseg.tcphdr->src, inseg.tcphdr->dest));
642 recv_flags = TF_CLOSED;
654 * Called by tcp_process. Checks if the given segment is an ACK for outstanding
655 * data, and if so frees the memory of the buffered data. Next, is places the
656 * segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
657 * is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
658 * i it has been removed from the buffer.
660 * If the incoming segment constitutes an ACK for a segment that was used for RTT
661 * estimation, the RTT is estimated here as well.
665 tcp_receive(struct tcp_pcb *pcb)
667 struct tcp_seg *next;
669 struct tcp_seg *prev, *cseg;
674 u32_t right_wnd_edge;
677 if (flags & TCP_ACK) {
678 right_wnd_edge = pcb->snd_wnd + pcb->snd_wl1;
681 if (TCP_SEQ_LT(pcb->snd_wl1, seqno) ||
682 (pcb->snd_wl1 == seqno && TCP_SEQ_LT(pcb->snd_wl2, ackno)) ||
683 (pcb->snd_wl2 == ackno && tcphdr->wnd > pcb->snd_wnd)) {
684 pcb->snd_wnd = tcphdr->wnd;
685 pcb->snd_wl1 = seqno;
686 pcb->snd_wl2 = ackno;
687 LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: window update %lu\n", pcb->snd_wnd));
690 if (pcb->snd_wnd != tcphdr->wnd) {
691 LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: no window update lastack %lu snd_max %lu ackno %lu wl1 %lu seqno %lu wl2 %lu\n",
692 pcb->lastack, pcb->snd_max, ackno, pcb->snd_wl1, seqno, pcb->snd_wl2));
694 #endif /* TCP_WND_DEBUG */
698 if (pcb->lastack == ackno) {
701 if (pcb->snd_wl1 + pcb->snd_wnd == right_wnd_edge){
703 if (pcb->dupacks >= 3 && pcb->unacked != NULL) {
704 if (!(pcb->flags & TF_INFR)) {
705 /* This is fast retransmit. Retransmit the first unacked segment. */
706 LWIP_DEBUGF(TCP_FR_DEBUG, ("tcp_receive: dupacks %u (%lu), fast retransmit %lu\n",
707 (unsigned int)pcb->dupacks, pcb->lastack,
708 ntohl(pcb->unacked->tcphdr->seqno)));
710 /* Set ssthresh to max (FlightSize / 2, 2*SMSS) */
711 pcb->ssthresh = LWIP_MAX((pcb->snd_max -
715 pcb->cwnd = pcb->ssthresh + 3 * pcb->mss;
716 pcb->flags |= TF_INFR;
718 /* Inflate the congestion window, but not if it means that
719 the value overflows. */
720 if ((u16_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
721 pcb->cwnd += pcb->mss;
726 LWIP_DEBUGF(TCP_FR_DEBUG, ("tcp_receive: dupack averted %lu %lu\n",
727 pcb->snd_wl1 + pcb->snd_wnd, right_wnd_edge));
729 } else if (TCP_SEQ_LT(pcb->lastack, ackno) &&
730 TCP_SEQ_LEQ(ackno, pcb->snd_max)) {
731 /* We come here when the ACK acknowledges new data. */
733 /* Reset the "IN Fast Retransmit" flag, since we are no longer
734 in fast retransmit. Also reset the congestion window to the
735 slow start threshold. */
736 if (pcb->flags & TF_INFR) {
737 pcb->flags &= ~TF_INFR;
738 pcb->cwnd = pcb->ssthresh;
741 /* Reset the number of retransmissions. */
744 /* Reset the retransmission time-out. */
745 pcb->rto = (pcb->sa >> 3) + pcb->sv;
747 /* Update the send buffer space. */
748 pcb->acked = ackno - pcb->lastack;
749 pcb->snd_buf += pcb->acked;
751 /* Reset the fast retransmit variables. */
753 pcb->lastack = ackno;
755 /* Update the congestion control variables (cwnd and
757 if (pcb->state >= ESTABLISHED) {
758 if (pcb->cwnd < pcb->ssthresh) {
759 if ((u16_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
760 pcb->cwnd += pcb->mss;
762 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: slow start cwnd %u\n", pcb->cwnd));
764 u16_t new_cwnd = (pcb->cwnd + pcb->mss * pcb->mss / pcb->cwnd);
765 if (new_cwnd > pcb->cwnd) {
766 pcb->cwnd = new_cwnd;
768 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: congestion avoidance cwnd %u\n", pcb->cwnd));
771 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: ACK for %lu, unacked->seqno %lu:%lu\n",
773 pcb->unacked != NULL?
774 ntohl(pcb->unacked->tcphdr->seqno): 0,
775 pcb->unacked != NULL?
776 ntohl(pcb->unacked->tcphdr->seqno) + TCP_TCPLEN(pcb->unacked): 0));
778 /* Remove segment from the unacknowledged list if the incoming
779 ACK acknowlegdes them. */
780 while (pcb->unacked != NULL &&
781 TCP_SEQ_LEQ(ntohl(pcb->unacked->tcphdr->seqno) +
782 TCP_TCPLEN(pcb->unacked), ackno)) {
783 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %lu:%lu from pcb->unacked\n",
784 ntohl(pcb->unacked->tcphdr->seqno),
785 ntohl(pcb->unacked->tcphdr->seqno) +
786 TCP_TCPLEN(pcb->unacked)));
789 pcb->unacked = pcb->unacked->next;
791 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %u ... ", (unsigned int)pcb->snd_queuelen));
792 pcb->snd_queuelen -= pbuf_clen(next->p);
795 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%u (after freeing unacked)\n", (unsigned int)pcb->snd_queuelen));
796 if (pcb->snd_queuelen != 0) {
797 LWIP_ASSERT("tcp_receive: valid queue length", pcb->unacked != NULL ||
798 pcb->unsent != NULL);
804 /* We go through the ->unsent list to see if any of the segments
805 on the list are acknowledged by the ACK. This may seem
806 strange since an "unsent" segment shouldn't be acked. The
807 rationale is that lwIP puts all outstanding segments on the
808 ->unsent list after a retransmission, so these segments may
809 in fact have been sent once. */
810 while (pcb->unsent != NULL &&
811 TCP_SEQ_LEQ(ntohl(pcb->unsent->tcphdr->seqno) + TCP_TCPLEN(pcb->unsent),
813 TCP_SEQ_LEQ(ackno, pcb->snd_max)) {
814 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %lu:%lu from pcb->unsent\n",
815 ntohl(pcb->unsent->tcphdr->seqno),
816 ntohl(pcb->unsent->tcphdr->seqno) +
817 TCP_TCPLEN(pcb->unsent)));
820 pcb->unsent = pcb->unsent->next;
821 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %u ... ", (unsigned int)pcb->snd_queuelen));
822 pcb->snd_queuelen -= pbuf_clen(next->p);
824 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%u (after freeing unsent)\n", (unsigned int)pcb->snd_queuelen));
825 if (pcb->snd_queuelen != 0) {
826 LWIP_ASSERT("tcp_receive: valid queue length", pcb->unacked != NULL ||
827 pcb->unsent != NULL);
830 if (pcb->unsent != NULL) {
831 pcb->snd_nxt = htonl(pcb->unsent->tcphdr->seqno);
835 /* End of ACK for new data processing. */
837 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: pcb->rttest %u rtseq %lu ackno %lu\n",
838 pcb->rttest, pcb->rtseq, ackno));
840 /* RTT estimation calculations. This is done by checking if the
841 incoming segment acknowledges the segment we use to take a
842 round-trip time measurement. */
843 if (pcb->rttest && TCP_SEQ_LT(pcb->rtseq, ackno)) {
844 m = tcp_ticks - pcb->rttest;
846 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: experienced rtt %u ticks (%u msec).\n",
847 m, m * TCP_SLOW_INTERVAL));
849 /* This is taken directly from VJs original code in his paper */
850 m = m - (pcb->sa >> 3);
855 m = m - (pcb->sv >> 2);
857 pcb->rto = (pcb->sa >> 3) + pcb->sv;
859 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: RTO %u (%u miliseconds)\n",
860 pcb->rto, pcb->rto * TCP_SLOW_INTERVAL));
866 /* If the incoming segment contains data, we must process it
869 /* This code basically does three things:
871 +) If the incoming segment contains data that is the next
872 in-sequence data, this data is passed to the application. This
873 might involve trimming the first edge of the data. The rcv_nxt
874 variable and the advertised window are adjusted.
876 +) If the incoming segment has data that is above the next
877 sequence number expected (->rcv_nxt), the segment is placed on
878 the ->ooseq queue. This is done by finding the appropriate
879 place in the ->ooseq queue (which is ordered by sequence
880 number) and trim the segment in both ends if needed. An
881 immediate ACK is sent to indicate that we received an
882 out-of-sequence segment.
884 +) Finally, we check if the first segment on the ->ooseq queue
885 now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
886 rcv_nxt > ooseq->seqno, we must trim the first edge of the
887 segment on ->ooseq before we adjust rcv_nxt. The data in the
888 segments that are now on sequence are chained onto the
889 incoming segment so that we only need to call the application
893 /* First, we check if we must trim the first edge. We have to do
894 this if the sequence number of the incoming segment is less
895 than rcv_nxt, and the sequence number plus the length of the
896 segment is larger than rcv_nxt. */
897 if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)){
898 if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + tcplen)) {
899 /* Trimming the first edge is done by pushing the payload
900 pointer in the pbuf downwards. This is somewhat tricky since
901 we do not want to discard the full contents of the pbuf up to
902 the new starting point of the data since we have to keep the
903 TCP header which is present in the first pbuf in the chain.
905 What is done is really quite a nasty hack: the first pbuf in
906 the pbuf chain is pointed to by inseg.p. Since we need to be
907 able to deallocate the whole pbuf, we cannot change this
908 inseg.p pointer to point to any of the later pbufs in the
909 chain. Instead, we point the ->payload pointer in the first
910 pbuf to data in one of the later pbufs. We also set the
911 inseg.data pointer to point to the right place. This way, the
912 ->p pointer will still point to the first pbuf, but the
913 ->p->payload pointer will point to data in another pbuf.
915 After we are done with adjusting the pbuf pointers we must
916 adjust the ->data pointer in the seg and the segment
918 off = pcb->rcv_nxt - seqno;
919 if (inseg.p->len < off) {
921 while (p->len < off) {
923 inseg.p->tot_len -= p->len;
927 pbuf_header(p, -off);
929 pbuf_header(inseg.p, -off);
931 inseg.dataptr = inseg.p->payload;
932 inseg.len -= pcb->rcv_nxt - seqno;
933 inseg.tcphdr->seqno = seqno = pcb->rcv_nxt;
936 /* the whole segment is < rcv_nxt */
937 /* must be a duplicate of a packet that has already been correctly handled */
939 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: duplicate seqno %lu\n", seqno));
944 /* The sequence number must be within the window (above rcv_nxt
945 and below rcv_nxt + rcv_wnd) in order to be further
947 if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
948 TCP_SEQ_LT(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
949 if (pcb->rcv_nxt == seqno) {
950 /* The incoming segment is the next in sequence. We check if
951 we have to trim the end of the segment and update rcv_nxt
952 and pass the data to the application. */
954 if (pcb->ooseq != NULL &&
955 TCP_SEQ_LEQ(pcb->ooseq->tcphdr->seqno, seqno + inseg.len)) {
956 /* We have to trim the second edge of the incoming
958 inseg.len = pcb->ooseq->tcphdr->seqno - seqno;
959 pbuf_realloc(inseg.p, inseg.len);
961 #endif /* TCP_QUEUE_OOSEQ */
963 tcplen = TCP_TCPLEN(&inseg);
965 pcb->rcv_nxt += tcplen;
967 /* Update the receiver's (our) window. */
968 if (pcb->rcv_wnd < tcplen) {
971 pcb->rcv_wnd -= tcplen;
974 /* If there is data in the segment, we make preparations to
975 pass this up to the application. The ->recv_data variable
976 is used for holding the pbuf that goes to the
977 application. The code for reassembling out-of-sequence data
978 chains its data on this pbuf as well.
980 If the segment was a FIN, we set the TF_GOT_FIN flag that will
981 be used to indicate to the application that the remote side has
982 closed its end of the connection. */
983 if (inseg.p->tot_len > 0) {
985 /* Since this pbuf now is the responsibility of the
986 application, we delete our reference to it so that we won't
987 (mistakingly) deallocate it. */
990 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
991 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: received FIN.\n"));
992 recv_flags = TF_GOT_FIN;
996 /* We now check if we have segments on the ->ooseq queue that
997 is now in sequence. */
998 while (pcb->ooseq != NULL &&
999 pcb->ooseq->tcphdr->seqno == pcb->rcv_nxt) {
1002 seqno = pcb->ooseq->tcphdr->seqno;
1004 pcb->rcv_nxt += TCP_TCPLEN(cseg);
1005 if (pcb->rcv_wnd < TCP_TCPLEN(cseg)) {
1008 pcb->rcv_wnd -= TCP_TCPLEN(cseg);
1010 if (cseg->p->tot_len > 0) {
1011 /* Chain this pbuf onto the pbuf that we will pass to
1014 pbuf_cat(recv_data, cseg->p);
1016 recv_data = cseg->p;
1020 if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
1021 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: dequeued FIN.\n"));
1022 recv_flags = TF_GOT_FIN;
1026 pcb->ooseq = cseg->next;
1029 #endif /* TCP_QUEUE_OOSEQ */
1032 /* Acknowledge the segment(s). */
1036 /* We get here if the incoming segment is out-of-sequence. */
1039 /* We queue the segment on the ->ooseq queue. */
1040 if (pcb->ooseq == NULL) {
1041 pcb->ooseq = tcp_seg_copy(&inseg);
1043 /* If the queue is not empty, we walk through the queue and
1044 try to find a place where the sequence number of the
1045 incoming segment is between the sequence numbers of the
1046 previous and the next segment on the ->ooseq queue. That is
1047 the place where we put the incoming segment. If needed, we
1048 trim the second edges of the previous and the incoming
1049 segment so that it will fit into the sequence.
1051 If the incoming segment has the same sequence number as a
1052 segment on the ->ooseq queue, we discard the segment that
1053 contains less data. */
1056 for(next = pcb->ooseq; next != NULL; next = next->next) {
1057 if (seqno == next->tcphdr->seqno) {
1058 /* The sequence number of the incoming segment is the
1059 same as the sequence number of the segment on
1060 ->ooseq. We check the lengths to see which one to
1062 if (inseg.len > next->len) {
1063 /* The incoming segment is larger than the old
1064 segment. We replace the old segment with the new
1066 cseg = tcp_seg_copy(&inseg);
1068 cseg->next = next->next;
1077 /* Either the lenghts are the same or the incoming
1078 segment was smaller than the old one; in either
1079 case, we ditch the incoming segment. */
1084 if (TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
1085 /* The sequence number of the incoming segment is lower
1086 than the sequence number of the first segment on the
1087 queue. We put the incoming segment first on the
1090 if (TCP_SEQ_GT(seqno + inseg.len, next->tcphdr->seqno)) {
1091 /* We need to trim the incoming segment. */
1092 inseg.len = next->tcphdr->seqno - seqno;
1093 pbuf_realloc(inseg.p, inseg.len);
1095 cseg = tcp_seg_copy(&inseg);
1102 } else if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) &&
1103 TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
1104 /* The sequence number of the incoming segment is in
1105 between the sequence numbers of the previous and
1106 the next segment on ->ooseq. We trim and insert the
1107 incoming segment and trim the previous segment, if
1109 if (TCP_SEQ_GT(seqno + inseg.len, next->tcphdr->seqno)) {
1110 /* We need to trim the incoming segment. */
1111 inseg.len = next->tcphdr->seqno - seqno;
1112 pbuf_realloc(inseg.p, inseg.len);
1115 cseg = tcp_seg_copy(&inseg);
1119 if (TCP_SEQ_GT(prev->tcphdr->seqno + prev->len, seqno)) {
1120 /* We need to trim the prev segment. */
1121 prev->len = seqno - prev->tcphdr->seqno;
1122 pbuf_realloc(prev->p, prev->len);
1127 /* If the "next" segment is the last segment on the
1128 ooseq queue, we add the incoming segment to the end
1130 if (next->next == NULL &&
1131 TCP_SEQ_GT(seqno, next->tcphdr->seqno)) {
1132 next->next = tcp_seg_copy(&inseg);
1133 if (next->next != NULL) {
1134 if (TCP_SEQ_GT(next->tcphdr->seqno + next->len, seqno)) {
1135 /* We need to trim the last segment. */
1136 next->len = seqno - next->tcphdr->seqno;
1137 pbuf_realloc(next->p, next->len);
1146 #endif /* TCP_QUEUE_OOSEQ */
1151 /* Segments with length 0 is taken care of here. Segments that
1152 fall out of the window are ACKed. */
1153 if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1154 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
1163 * Parses the options contained in the incoming segment. (Code taken
1164 * from uIP with only small changes.)
1169 tcp_parseopt(struct tcp_pcb *pcb)
1175 opts = (u8_t *)tcphdr + TCP_HLEN;
1177 /* Parse the TCP MSS option, if present. */
1178 if(TCPH_HDRLEN(tcphdr) > 0x5) {
1179 for(c = 0; c < (TCPH_HDRLEN(tcphdr) - 5) << 2 ;) {
1182 /* End of options. */
1184 } else if (opt == 0x01) {
1187 } else if (opt == 0x02 &&
1188 opts[c + 1] == 0x04) {
1189 /* An MSS option with the right option length. */
1190 mss = (opts[c + 2] << 8) | opts[c + 3];
1191 pcb->mss = mss > TCP_MSS? TCP_MSS: mss;
1193 /* And we are done processing options. */
1196 if (opts[c + 1] == 0) {
1197 /* If the length field is zero, the options are malformed
1198 and we don't process them further. */
1201 /* All other options have a length field, so that we easily
1202 can skip past them. */
1208 #endif /* LWIP_TCP */