/** @file * @brief TCP handler * * Handle TCP connections. */ /* * Copyright (c) 2016 Intel Corporation * Copyright 2011-2015 by Andrey Butok. FNET Community. * Copyright 2008-2010 by Andrey Butok. Freescale Semiconductor, Inc. * Copyright 2003 by Alexey Shervashidze, Andrey Butok. Motorola SPS. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #if defined(CONFIG_NET_DEBUG_TCP) #define SYS_LOG_DOMAIN "net/tcp" #define NET_DEBUG 1 #endif #include #include #include #include #include #include #include #include #include "connection.h" #include "net_private.h" #include "ipv6.h" #include "ipv4.h" #include "tcp.h" /* * Each TCP connection needs to be tracked by net_context, so * we need to allocate equal number of control structures here. */ #define NET_MAX_TCP_CONTEXT CONFIG_NET_MAX_CONTEXTS static struct net_tcp tcp_context[NET_MAX_TCP_CONTEXT]; static struct k_sem tcp_lock; struct tcp_segment { uint32_t seq; uint32_t ack; uint16_t wnd; uint8_t flags; uint8_t optlen; void *options; struct net_buf *data; struct sockaddr_ptr *src_addr; const struct sockaddr *dst_addr; }; #if NET_DEBUG > 0 static char upper_if_set(char chr, bool set) { if (set) { return chr & ~0x20; } return chr | 0x20; } static void net_tcp_trace(char *str, struct net_buf *buf) { uint8_t flags = NET_TCP_FLAGS(buf); NET_INFO("%s[TCP header]", str); NET_INFO("|(SrcPort) %5u |(DestPort) %5u |", ntohs(NET_TCP_BUF(buf)->src_port), ntohs(NET_TCP_BUF(buf)->dst_port)); NET_INFO("|(Sequence number) 0x%010x |", sys_get_be32(NET_TCP_BUF(buf)->seq)); NET_INFO("|(ACK number) 0x%010x |", sys_get_be32(NET_TCP_BUF(buf)->ack)); NET_INFO("|(HL) %2u |(F) %c%c%c%c%c%c |(Window) %5u |", (NET_TCP_BUF(buf)->offset >> 4) * 4, upper_if_set('u', flags & NET_TCP_URG), upper_if_set('a', flags & NET_TCP_ACK), upper_if_set('p', flags & NET_TCP_PSH), upper_if_set('r', flags & NET_TCP_RST), upper_if_set('s', flags & NET_TCP_SYN), upper_if_set('f', flags & NET_TCP_FIN), sys_get_be16(NET_TCP_BUF(buf)->wnd)); NET_INFO("|(Checksum) 0x%04x |(Urgent) %5u |", ntohs(NET_TCP_BUF(buf)->chksum), sys_get_be16(NET_TCP_BUF(buf)->urg)); } #else #define net_tcp_trace(...) #endif static inline uint32_t init_isn(void) { /* Randomise initial seq number */ return sys_rand32_get(); } struct net_tcp *net_tcp_alloc(struct net_context *context) { int i, key; key = irq_lock(); for (i = 0; i < NET_MAX_TCP_CONTEXT; i++) { if (!net_tcp_is_used(&tcp_context[i])) { tcp_context[i].flags |= NET_TCP_IN_USE; break; } } irq_unlock(key); if (i >= NET_MAX_TCP_CONTEXT) { return NULL; } memset(&tcp_context[i], 0, sizeof(struct net_tcp)); tcp_context[i].flags = NET_TCP_IN_USE; tcp_context[i].state = NET_TCP_CLOSED; tcp_context[i].context = context; tcp_context[i].send_seq = init_isn(); tcp_context[i].recv_max_ack = tcp_context[i].send_seq + 1u; return &tcp_context[i]; } int net_tcp_release(struct net_tcp *tcp) { int key; if (tcp >= &tcp_context[0] || tcp <= &tcp_context[NET_MAX_TCP_CONTEXT]) { return -EINVAL; } if (tcp->state == NET_TCP_FIN_WAIT_1 || tcp->state == NET_TCP_FIN_WAIT_2 || tcp->state == NET_TCP_CLOSING || tcp->state == NET_TCP_TIME_WAIT) { k_delayed_work_cancel(&tcp->fin_timer); } tcp->state = NET_TCP_CLOSED; tcp->context = NULL; if (tcp->send) { net_nbuf_unref(tcp->send); tcp->send = NULL; } if (tcp->recv) { net_nbuf_unref(tcp->recv); tcp->recv = NULL; } key = irq_lock(); tcp->flags &= ~NET_TCP_IN_USE; irq_unlock(key); return 0; } static inline int net_tcp_add_options(struct net_buf *header, size_t len, void *data) { uint8_t optlen; memcpy(net_buf_add(header, len), data, len); /* Set the length (this value is saved in 4-byte words format) */ if ((len & 0x3u) != 0u) { optlen = (len & 0xfffCu) + 4u; } else { optlen = len; } return 0; } static struct net_buf *prepare_segment(struct net_tcp *tcp, struct tcp_segment *segment) { struct net_buf *buf, *header; struct net_tcp_hdr *tcphdr; struct net_context *context = tcp->context; uint16_t dst_port, src_port; NET_ASSERT(context); buf = net_nbuf_get_tx(context); #if defined(CONFIG_NET_IPV4) if (net_nbuf_family(buf) == AF_INET) { net_ipv4_create(context, buf, &(net_sin(segment->dst_addr)->sin_addr)); dst_port = net_sin(segment->dst_addr)->sin_port; src_port = ((struct sockaddr_in_ptr *)&context->local)-> sin_port; NET_IPV4_BUF(buf)->proto = IPPROTO_TCP; } else #endif #if defined(CONFIG_NET_IPV6) if (net_nbuf_family(buf) == AF_INET6) { net_ipv6_create(tcp->context, buf, &(net_sin6(segment->dst_addr)->sin6_addr)); dst_port = net_sin6(segment->dst_addr)->sin6_port; src_port = ((struct sockaddr_in6_ptr *)&context->local)-> sin6_port; NET_IPV6_BUF(buf)->nexthdr = IPPROTO_TCP; } else #endif { goto proto_err; } header = buf->frags; tcphdr = (struct net_tcp_hdr *)net_buf_add(header, NET_TCPH_LEN); if (segment->options && segment->optlen) { net_tcp_add_options(header, segment->optlen, segment->options); } else { tcphdr->offset = NET_TCPH_LEN << 2; } tcphdr->src_port = src_port; tcphdr->dst_port = dst_port; tcphdr->seq[0] = segment->seq >> 24; tcphdr->seq[1] = segment->seq >> 16; tcphdr->seq[2] = segment->seq >> 8; tcphdr->seq[3] = segment->seq; tcphdr->ack[0] = segment->ack >> 24; tcphdr->ack[1] = segment->ack >> 16; tcphdr->ack[2] = segment->ack >> 8; tcphdr->ack[3] = segment->ack; tcphdr->flags = segment->flags; tcphdr->wnd[0] = segment->wnd >> 8; tcphdr->wnd[1] = segment->wnd; if (segment->data) { net_buf_frag_add(header, segment->data); } #if defined(CONFIG_NET_IPV4) if (net_nbuf_family(buf) == AF_INET) { net_ipv4_finalize(context, buf); } else #endif #if defined(CONFIG_NET_IPV6) if (net_nbuf_family(buf) == AF_INET6) { net_ipv6_finalize(context, buf); } else #endif { /* Set the data to NULL that we avoid double free when * called from net_tcp_prepare_data_segment() */ segment->data = NULL; proto_err: NET_DBG("Protocol family %d not supported", net_nbuf_family(buf)); net_nbuf_unref(buf); return NULL; } buf = net_nbuf_compact(buf); net_tcp_trace("", buf); return buf; } static inline uint32_t get_recv_wnd(struct net_tcp *tcp) { /* We don't queue received data inside the stack, we hand off * packets to synchronous callbacks (who can queue if they * want, but it's not our business). So the available window * size is always the same. There are two configurables to * check though. */ return min(NET_TCP_MAX_WIN, NET_TCP_BUF_MAX_LEN); } /* True if the (signed!) difference "seq1 - seq2" is positive and less * than 2^29. That is, seq1 is "after" seq2. */ static inline bool seq_greater(uint32_t seq1, uint32_t seq2) { int d = (int)(seq1 - seq2); return d > 0 && d < 0x20000000; } int net_tcp_prepare_segment(struct net_tcp *tcp, uint8_t flags, void *options, size_t optlen, const struct sockaddr *remote, struct net_buf **send_buf) { uint32_t seq; uint16_t wnd; uint32_t ack = 0; struct tcp_segment segment = { 0 }; seq = tcp->send_seq; if (flags & NET_TCP_ACK) { ack = tcp->send_ack; if (tcp->state == NET_TCP_FIN_WAIT_1) { if (flags & NET_TCP_FIN) { /* FIN is used here only to determine which * state to go to next; it's not to be used * in the sent segment. */ flags &= ~NET_TCP_FIN; net_tcp_change_state(tcp, NET_TCP_TIME_WAIT); } else { net_tcp_change_state(tcp, NET_TCP_CLOSING); } } else if (tcp->state == NET_TCP_FIN_WAIT_2) { net_tcp_change_state(tcp, NET_TCP_TIME_WAIT); } } if (flags & NET_TCP_FIN) { tcp->flags |= NET_TCP_FINAL_SENT; seq++; if (tcp->state == NET_TCP_ESTABLISHED || tcp->state == NET_TCP_SYN_RCVD) { net_tcp_change_state(tcp, NET_TCP_FIN_WAIT_1); } else if (tcp->state == NET_TCP_CLOSE_WAIT) { net_tcp_change_state(tcp, NET_TCP_LAST_ACK); } } if (flags & NET_TCP_SYN) { seq++; } wnd = get_recv_wnd(tcp); segment.src_addr = &tcp->context->local; segment.dst_addr = remote; segment.seq = tcp->send_seq; segment.ack = ack; segment.flags = flags; segment.wnd = wnd; segment.options = options; segment.optlen = optlen; segment.data = NULL; *send_buf = prepare_segment(tcp, &segment); tcp->send_seq = seq; if (seq_greater(tcp->send_seq, tcp->recv_max_ack)) { tcp->recv_max_ack = tcp->send_seq; } return 0; } static inline uint32_t get_size(uint32_t pos1, uint32_t pos2) { uint32_t size; if (pos1 <= pos2) { size = pos2 - pos1; } else { size = NET_TCP_MAX_SEQ - pos1 + pos2 + 1; } return size; } #if defined(CONFIG_NET_IPV4) #ifndef NET_IP_MAX_PACKET #define NET_IP_MAX_PACKET (10 * 1024) #endif #define NET_IP_MAX_OPTIONS 40 /* Maximum option field length */ static inline size_t ip_max_packet_len(struct in_addr *dest_ip) { ARG_UNUSED(dest_ip); return (NET_IP_MAX_PACKET - (NET_IP_MAX_OPTIONS + sizeof(struct net_ipv4_hdr))) & (~0x3LU); } #else /* CONFIG_NET_IPV4 */ #define ip_max_packet_len(...) 0 #endif /* CONFIG_NET_IPV4 */ int net_tcp_prepare_data_segment(struct net_tcp *tcp, struct net_buf *buf, void *options, size_t optlen, const struct sockaddr *remote, struct net_buf **send_buf) { struct tcp_segment segment; size_t new_size; uint32_t seq; size_t data_size = net_buf_frags_len(buf); struct net_buf *data = NULL; uint8_t flags = 0; uint32_t tmp = 0; int ret = 0; NET_ASSERT_INFO(tcp, "TCP control block NULL"); NET_ASSERT_INFO(buf, "No data to send"); seq = tcp->send_seq; /* How much data can we send? */ if (tcp->send) { new_size = net_buf_frags_len(tcp->send) - get_size(tcp->recv_ack, tcp->send_seq); } else { new_size = get_size(tcp->recv_ack, tcp->send_seq); } if (data_size > new_size) { /* Now we will only use part of the data in net_buf's */ data_size = new_size; } if (net_sin(&tcp->context->remote)->sin_family == AF_INET) { tmp = ip_max_packet_len(&net_sin(&tcp->context->remote)-> sin_addr); } /* TCP header needs to fit the MTU */ if (data_size + NET_TCPH_LEN > tmp) { data_size = tmp - NET_TCPH_LEN; } flags |= NET_TCP_ACK; if (data_size > 0 && new_size == data_size) { flags |= NET_TCP_PSH; } if (tcp->flags & NET_TCP_IS_SHUTDOWN) { if (new_size == data_size) { /* End of the data sending. */ flags |= NET_TCP_FIN; seq++; if (tcp->state == NET_TCP_ESTABLISHED || tcp->state == NET_TCP_SYN_RCVD) { net_tcp_change_state(tcp, NET_TCP_FIN_WAIT_1); } else if (tcp->state == NET_TCP_CLOSE_WAIT) { net_tcp_change_state(tcp, NET_TCP_LAST_ACK); } tcp->flags |= NET_TCP_FINAL_SENT; } } if (data_size) { /* The data will not contain the TX user data buf as a first * element after the copy. */ if (buf->user_data_size) { if (!buf->frags) { NET_ERR("Wrong TX buf when sending TCP data"); return -EINVAL; } data = net_nbuf_copy(buf->frags, data_size, 0); } else { data = net_nbuf_copy(buf, data_size, 0); } /* Remove stuff from the buf so that it only contains * stuff that we have not been sent yet. */ net_nbuf_pull(buf, data_size); /* If there is already pending data, append new data after * the old one. */ if (tcp->send) { net_buf_frag_add(tcp->send, data); } else { tcp->send = data; } if (unlikely(!data)) { tcp->send_seq = seq + data_size; return -ENOMEM; } } /* Send the segment. */ segment.src_addr = &tcp->context->local; segment.dst_addr = remote; segment.seq = tcp->send_seq; segment.ack = tcp->send_ack; segment.flags = flags; segment.wnd = get_recv_wnd(tcp); segment.options = options; segment.optlen = optlen; segment.data = tcp->send; *send_buf = prepare_segment(tcp, &segment); if (!*send_buf) { if (segment.data) { /* tcp->send is not yet freed if we get here */ net_nbuf_unref(tcp->send); } tcp->send = NULL; ret = -EINVAL; } tcp->send_seq = seq + data_size; if (seq_greater(tcp->send_seq, tcp->recv_max_ack)) { tcp->recv_max_ack = tcp->send_seq; } return ret; } static void net_tcp_set_syn_opt(struct net_tcp *tcp, uint8_t *options, uint8_t *optionlen) { *optionlen = 0; /* If 0, detect MSS based on interface MTU minus "TCP,IP header size" */ if (tcp->recv_mss == 0) { sa_family_t family = net_context_get_family(tcp->context); if (family == AF_INET) { #if defined(CONFIG_NET_IPV4) struct net_if *iface = net_context_get_iface(tcp->context); if (iface) { /* MTU - [TCP,IP header size]. */ tcp->recv_mss = iface->mtu - 40; } #else tcp->recv_mss = 0; #endif /* CONFIG_NET_IPV4 */ } #if defined(CONFIG_NET_IPV6) else if (family == AF_INET6) { tcp->recv_mss = 1280; } #endif /* CONFIG_NET_IPV6 */ else { tcp->recv_mss = 0; } } *((uint32_t *)(options + *optionlen)) = htonl((uint32_t)(tcp->recv_mss | NET_TCP_MSS_HEADER)); *optionlen += NET_TCP_MSS_SIZE; return; } int net_tcp_prepare_ack(struct net_tcp *tcp, const struct sockaddr *remote, struct net_buf **buf) { uint8_t options[NET_TCP_MAX_OPT_SIZE]; uint8_t optionlen; switch (tcp->state) { case NET_TCP_SYN_RCVD: /* In the SYN_RCVD state acknowledgment must be with the * SYN flag. */ tcp->send_seq--; net_tcp_set_syn_opt(tcp, options, &optionlen); net_tcp_prepare_segment(tcp, NET_TCP_SYN | NET_TCP_ACK, options, optionlen, remote, buf); break; case NET_TCP_FIN_WAIT_1: case NET_TCP_LAST_ACK: /* In the FIN_WAIT_1 and LAST_ACK states acknowledgment must * be with the FIN flag. */ tcp->send_seq--; net_tcp_prepare_segment(tcp, NET_TCP_FIN | NET_TCP_ACK, 0, 0, remote, buf); break; default: net_tcp_prepare_segment(tcp, NET_TCP_ACK, 0, 0, remote, buf); break; } return 0; } int net_tcp_prepare_reset(struct net_tcp *tcp, const struct sockaddr *remote, struct net_buf **buf) { struct tcp_segment segment = { 0 }; if ((net_context_get_state(tcp->context) != NET_CONTEXT_UNCONNECTED) && (tcp->state != NET_TCP_SYN_SENT) && (tcp->state != NET_TCP_TIME_WAIT)) { if (tcp->state == NET_TCP_SYN_RCVD) { /* Send the reset segment with acknowledgment. */ segment.seq = 0; segment.ack = tcp->send_ack; segment.flags = NET_TCP_RST | NET_TCP_ACK; } else { /* Send the reset segment without acknowledgment. */ segment.seq = tcp->recv_ack; segment.ack = 0; segment.flags = NET_TCP_RST; } segment.src_addr = &tcp->context->local; segment.dst_addr = remote; segment.wnd = 0; segment.options = NULL; segment.optlen = 0; segment.data = NULL; *buf = prepare_segment(tcp, &segment); } return 0; } const char const *net_tcp_state_str(enum net_tcp_state state) { #if NET_DEBUG switch (state) { case NET_TCP_CLOSED: return "CLOSED"; case NET_TCP_LISTEN: return "LISTEN"; case NET_TCP_SYN_SENT: return "SYN_SENT"; case NET_TCP_SYN_RCVD: return "SYN_RCVD"; case NET_TCP_ESTABLISHED: return "ESTABLISHED"; case NET_TCP_CLOSE_WAIT: return "CLOSE_WAIT"; case NET_TCP_LAST_ACK: return "LAST_ACK"; case NET_TCP_FIN_WAIT_1: return "FIN_WAIT_1"; case NET_TCP_FIN_WAIT_2: return "FIN_WAIT_2"; case NET_TCP_TIME_WAIT: return "TIME_WAIT"; case NET_TCP_CLOSING: return "CLOSING"; } #endif return ""; } void net_tcp_init(void) { k_sem_init(&tcp_lock, 0, UINT_MAX); k_sem_give(&tcp_lock); } #define FIN_TIMEOUT (2 * NET_TCP_MAX_SEG_LIFETIME * MSEC_PER_SEC) static void fin_timeout(struct k_work *work) { struct net_tcp *tcp = CONTAINER_OF(work, struct net_tcp, fin_timer); NET_DBG("Remote peer didn't confirm connection close"); net_context_put(tcp->context); } void net_tcp_change_state(struct net_tcp *tcp, enum net_tcp_state new_state) { NET_ASSERT(tcp); if (tcp->state == new_state) { return; } NET_ASSERT(new_state >= NET_TCP_CLOSED && new_state <= NET_TCP_CLOSING); NET_DBG("%s (%d) => %s (%d)", net_tcp_state_str(tcp->state), tcp->state, net_tcp_state_str(new_state), new_state); tcp->state = new_state; if (tcp->state == NET_TCP_FIN_WAIT_1) { /* Wait up to 2 * MSL before destroying this socket. */ k_delayed_work_cancel(&tcp->fin_timer); k_delayed_work_init(&tcp->fin_timer, fin_timeout); k_delayed_work_submit(&tcp->fin_timer, FIN_TIMEOUT); } if (tcp->state != NET_TCP_CLOSED) { return; } if (!tcp->context) { return; } /* Remove any port handlers if we are closing */ if (tcp->context->conn_handler) { net_tcp_unregister(tcp->context->conn_handler); tcp->context->conn_handler = NULL; } if (tcp->context->accept_cb) { tcp->context->accept_cb(tcp->context, &tcp->context->remote, sizeof(struct sockaddr), -ENETRESET, tcp->context->user_data); } }