zephyr/net/yaip/tcp.c

752 lines
17 KiB
C

/** @file
* @brief TCP handler
*
* Handle TCP connections.
*/
/*
* Copyright (c) 2016 Intel Corporation
* Copyright 2011-2015 by Andrey Butok. FNET Community.
* Copyright 2008-2010 by Andrey Butok. Freescale Semiconductor, Inc.
* Copyright 2003 by Alexey Shervashidze, Andrey Butok. Motorola SPS.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#if defined(CONFIG_NET_DEBUG_TCP)
#define SYS_LOG_DOMAIN "net/tcp"
#define NET_DEBUG 1
#endif
#include <kernel.h>
#include <string.h>
#include <errno.h>
#include <stdbool.h>
#include <net/nbuf.h>
#include <net/net_ip.h>
#include <net/net_context.h>
#include <misc/byteorder.h>
#include "connection.h"
#include "net_private.h"
#include "ipv6.h"
#include "ipv4.h"
#include "tcp.h"
/*
* Each TCP connection needs to be tracked by net_context, so
* we need to allocate equal number of control structures here.
*/
#define NET_MAX_TCP_CONTEXT CONFIG_NET_MAX_CONTEXTS
static struct net_tcp tcp_context[NET_MAX_TCP_CONTEXT];
static struct k_sem tcp_lock;
struct tcp_segment {
uint32_t seq;
uint32_t ack;
uint16_t wnd;
uint8_t flags;
uint8_t optlen;
void *options;
struct net_buf *data;
struct sockaddr_ptr *src_addr;
const struct sockaddr *dst_addr;
};
#if NET_DEBUG > 0
static char upper_if_set(char chr, bool set)
{
if (set) {
return chr & ~0x20;
}
return chr | 0x20;
}
static void net_tcp_trace(char *str, struct net_buf *buf)
{
uint8_t flags = NET_TCP_FLAGS(buf);
NET_INFO("%s[TCP header]", str);
NET_INFO("|(SrcPort) %5u |(DestPort) %5u |",
ntohs(NET_TCP_BUF(buf)->src_port),
ntohs(NET_TCP_BUF(buf)->dst_port));
NET_INFO("|(Sequence number) 0x%010x |",
sys_get_be32(NET_TCP_BUF(buf)->seq));
NET_INFO("|(ACK number) 0x%010x |",
sys_get_be32(NET_TCP_BUF(buf)->ack));
NET_INFO("|(HL) %2u |(F) %c%c%c%c%c%c |(Window) %5u |",
(NET_TCP_BUF(buf)->offset >> 4) * 4,
upper_if_set('u', flags & NET_TCP_URG),
upper_if_set('a', flags & NET_TCP_ACK),
upper_if_set('p', flags & NET_TCP_PSH),
upper_if_set('r', flags & NET_TCP_RST),
upper_if_set('s', flags & NET_TCP_SYN),
upper_if_set('f', flags & NET_TCP_FIN),
sys_get_be16(NET_TCP_BUF(buf)->wnd));
NET_INFO("|(Checksum) 0x%04x |(Urgent) %5u |",
ntohs(NET_TCP_BUF(buf)->chksum),
sys_get_be16(NET_TCP_BUF(buf)->urg));
}
#else
#define net_tcp_trace(...)
#endif
static inline uint32_t init_isn(void)
{
/* Randomise initial seq number */
return sys_rand32_get();
}
struct net_tcp *net_tcp_alloc(struct net_context *context)
{
int i, key;
key = irq_lock();
for (i = 0; i < NET_MAX_TCP_CONTEXT; i++) {
if (!net_tcp_is_used(&tcp_context[i])) {
tcp_context[i].flags |= NET_TCP_IN_USE;
break;
}
}
irq_unlock(key);
if (i >= NET_MAX_TCP_CONTEXT) {
return NULL;
}
memset(&tcp_context[i], 0, sizeof(struct net_tcp));
tcp_context[i].flags = NET_TCP_IN_USE;
tcp_context[i].state = NET_TCP_CLOSED;
tcp_context[i].context = context;
tcp_context[i].send_seq = init_isn();
tcp_context[i].recv_max_ack = tcp_context[i].send_seq + 1u;
return &tcp_context[i];
}
int net_tcp_release(struct net_tcp *tcp)
{
int key;
if (tcp >= &tcp_context[0] ||
tcp <= &tcp_context[NET_MAX_TCP_CONTEXT]) {
return -EINVAL;
}
if (tcp->state == NET_TCP_FIN_WAIT_1 ||
tcp->state == NET_TCP_FIN_WAIT_2 ||
tcp->state == NET_TCP_CLOSING ||
tcp->state == NET_TCP_TIME_WAIT) {
k_delayed_work_cancel(&tcp->fin_timer);
}
tcp->state = NET_TCP_CLOSED;
tcp->context = NULL;
if (tcp->send) {
net_nbuf_unref(tcp->send);
tcp->send = NULL;
}
if (tcp->recv) {
net_nbuf_unref(tcp->recv);
tcp->recv = NULL;
}
key = irq_lock();
tcp->flags &= ~NET_TCP_IN_USE;
irq_unlock(key);
return 0;
}
static inline int net_tcp_add_options(struct net_buf *header, size_t len,
void *data)
{
uint8_t optlen;
memcpy(net_buf_add(header, len), data, len);
/* Set the length (this value is saved in 4-byte words format) */
if ((len & 0x3u) != 0u) {
optlen = (len & 0xfffCu) + 4u;
} else {
optlen = len;
}
return 0;
}
static struct net_buf *prepare_segment(struct net_tcp *tcp,
struct tcp_segment *segment)
{
struct net_buf *buf, *header;
struct net_tcp_hdr *tcphdr;
struct net_context *context = tcp->context;
uint16_t dst_port, src_port;
NET_ASSERT(context);
buf = net_nbuf_get_tx(context);
#if defined(CONFIG_NET_IPV4)
if (net_nbuf_family(buf) == AF_INET) {
net_ipv4_create(context, buf,
&(net_sin(segment->dst_addr)->sin_addr));
dst_port = net_sin(segment->dst_addr)->sin_port;
src_port = ((struct sockaddr_in_ptr *)&context->local)->
sin_port;
NET_IPV4_BUF(buf)->proto = IPPROTO_TCP;
} else
#endif
#if defined(CONFIG_NET_IPV6)
if (net_nbuf_family(buf) == AF_INET6) {
net_ipv6_create(tcp->context, buf,
&(net_sin6(segment->dst_addr)->sin6_addr));
dst_port = net_sin6(segment->dst_addr)->sin6_port;
src_port = ((struct sockaddr_in6_ptr *)&context->local)->
sin6_port;
NET_IPV6_BUF(buf)->nexthdr = IPPROTO_TCP;
} else
#endif
{
goto proto_err;
}
header = buf->frags;
tcphdr = (struct net_tcp_hdr *)net_buf_add(header, NET_TCPH_LEN);
if (segment->options && segment->optlen) {
net_tcp_add_options(header, segment->optlen, segment->options);
} else {
tcphdr->offset = NET_TCPH_LEN << 2;
}
tcphdr->src_port = src_port;
tcphdr->dst_port = dst_port;
tcphdr->seq[0] = segment->seq >> 24;
tcphdr->seq[1] = segment->seq >> 16;
tcphdr->seq[2] = segment->seq >> 8;
tcphdr->seq[3] = segment->seq;
tcphdr->ack[0] = segment->ack >> 24;
tcphdr->ack[1] = segment->ack >> 16;
tcphdr->ack[2] = segment->ack >> 8;
tcphdr->ack[3] = segment->ack;
tcphdr->flags = segment->flags;
tcphdr->wnd[0] = segment->wnd >> 8;
tcphdr->wnd[1] = segment->wnd;
if (segment->data) {
net_buf_frag_add(header, segment->data);
}
#if defined(CONFIG_NET_IPV4)
if (net_nbuf_family(buf) == AF_INET) {
net_ipv4_finalize(context, buf);
} else
#endif
#if defined(CONFIG_NET_IPV6)
if (net_nbuf_family(buf) == AF_INET6) {
net_ipv6_finalize(context, buf);
} else
#endif
{
/* Set the data to NULL that we avoid double free when
* called from net_tcp_prepare_data_segment()
*/
segment->data = NULL;
proto_err:
NET_DBG("Protocol family %d not supported",
net_nbuf_family(buf));
net_nbuf_unref(buf);
return NULL;
}
buf = net_nbuf_compact(buf);
net_tcp_trace("", buf);
return buf;
}
static inline uint32_t get_recv_wnd(struct net_tcp *tcp)
{
/* We don't queue received data inside the stack, we hand off
* packets to synchronous callbacks (who can queue if they
* want, but it's not our business). So the available window
* size is always the same. There are two configurables to
* check though.
*/
return min(NET_TCP_MAX_WIN, NET_TCP_BUF_MAX_LEN);
}
/* True if the (signed!) difference "seq1 - seq2" is positive and less
* than 2^29. That is, seq1 is "after" seq2.
*/
static inline bool seq_greater(uint32_t seq1, uint32_t seq2)
{
int d = (int)(seq1 - seq2);
return d > 0 && d < 0x20000000;
}
int net_tcp_prepare_segment(struct net_tcp *tcp, uint8_t flags,
void *options, size_t optlen,
const struct sockaddr *remote,
struct net_buf **send_buf)
{
uint32_t seq;
uint16_t wnd;
uint32_t ack = 0;
struct tcp_segment segment = { 0 };
seq = tcp->send_seq;
if (flags & NET_TCP_ACK) {
ack = tcp->send_ack;
if (tcp->state == NET_TCP_FIN_WAIT_1) {
if (flags & NET_TCP_FIN) {
/* FIN is used here only to determine which
* state to go to next; it's not to be used
* in the sent segment.
*/
flags &= ~NET_TCP_FIN;
net_tcp_change_state(tcp, NET_TCP_TIME_WAIT);
} else {
net_tcp_change_state(tcp, NET_TCP_CLOSING);
}
} else if (tcp->state == NET_TCP_FIN_WAIT_2) {
net_tcp_change_state(tcp, NET_TCP_TIME_WAIT);
}
}
if (flags & NET_TCP_FIN) {
tcp->flags |= NET_TCP_FINAL_SENT;
seq++;
if (tcp->state == NET_TCP_ESTABLISHED ||
tcp->state == NET_TCP_SYN_RCVD) {
net_tcp_change_state(tcp, NET_TCP_FIN_WAIT_1);
} else if (tcp->state == NET_TCP_CLOSE_WAIT) {
net_tcp_change_state(tcp, NET_TCP_LAST_ACK);
}
}
if (flags & NET_TCP_SYN) {
seq++;
}
wnd = get_recv_wnd(tcp);
segment.src_addr = &tcp->context->local;
segment.dst_addr = remote;
segment.seq = tcp->send_seq;
segment.ack = ack;
segment.flags = flags;
segment.wnd = wnd;
segment.options = options;
segment.optlen = optlen;
segment.data = NULL;
*send_buf = prepare_segment(tcp, &segment);
tcp->send_seq = seq;
if (seq_greater(tcp->send_seq, tcp->recv_max_ack)) {
tcp->recv_max_ack = tcp->send_seq;
}
return 0;
}
static inline uint32_t get_size(uint32_t pos1, uint32_t pos2)
{
uint32_t size;
if (pos1 <= pos2) {
size = pos2 - pos1;
} else {
size = NET_TCP_MAX_SEQ - pos1 + pos2 + 1;
}
return size;
}
#if defined(CONFIG_NET_IPV4)
#ifndef NET_IP_MAX_PACKET
#define NET_IP_MAX_PACKET (10 * 1024)
#endif
#define NET_IP_MAX_OPTIONS 40 /* Maximum option field length */
static inline size_t ip_max_packet_len(struct in_addr *dest_ip)
{
ARG_UNUSED(dest_ip);
return (NET_IP_MAX_PACKET - (NET_IP_MAX_OPTIONS +
sizeof(struct net_ipv4_hdr))) & (~0x3LU);
}
#else /* CONFIG_NET_IPV4 */
#define ip_max_packet_len(...) 0
#endif /* CONFIG_NET_IPV4 */
int net_tcp_prepare_data_segment(struct net_tcp *tcp,
struct net_buf *buf,
void *options, size_t optlen,
const struct sockaddr *remote,
struct net_buf **send_buf)
{
struct tcp_segment segment;
size_t new_size;
uint32_t seq;
size_t data_size = net_buf_frags_len(buf);
struct net_buf *data = NULL;
uint8_t flags = 0;
uint32_t tmp = 0;
int ret = 0;
NET_ASSERT_INFO(tcp, "TCP control block NULL");
NET_ASSERT_INFO(buf, "No data to send");
seq = tcp->send_seq;
/* How much data can we send? */
if (tcp->send) {
new_size = net_buf_frags_len(tcp->send) -
get_size(tcp->recv_ack, tcp->send_seq);
} else {
new_size = get_size(tcp->recv_ack, tcp->send_seq);
}
if (data_size > new_size) {
/* Now we will only use part of the data in net_buf's */
data_size = new_size;
}
if (net_sin(&tcp->context->remote)->sin_family == AF_INET) {
tmp = ip_max_packet_len(&net_sin(&tcp->context->remote)->
sin_addr);
}
/* TCP header needs to fit the MTU */
if (data_size + NET_TCPH_LEN > tmp) {
data_size = tmp - NET_TCPH_LEN;
}
flags |= NET_TCP_ACK;
if (data_size > 0 && new_size == data_size) {
flags |= NET_TCP_PSH;
}
if (tcp->flags & NET_TCP_IS_SHUTDOWN) {
if (new_size == data_size) {
/* End of the data sending. */
flags |= NET_TCP_FIN;
seq++;
if (tcp->state == NET_TCP_ESTABLISHED ||
tcp->state == NET_TCP_SYN_RCVD) {
net_tcp_change_state(tcp, NET_TCP_FIN_WAIT_1);
} else if (tcp->state == NET_TCP_CLOSE_WAIT) {
net_tcp_change_state(tcp, NET_TCP_LAST_ACK);
}
tcp->flags |= NET_TCP_FINAL_SENT;
}
}
if (data_size) {
/* The data will not contain the TX user data buf as a first
* element after the copy.
*/
if (buf->user_data_size) {
if (!buf->frags) {
NET_ERR("Wrong TX buf when sending TCP data");
return -EINVAL;
}
data = net_nbuf_copy(buf->frags, data_size, 0);
} else {
data = net_nbuf_copy(buf, data_size, 0);
}
/* Remove stuff from the buf so that it only contains
* stuff that we have not been sent yet.
*/
net_nbuf_pull(buf, data_size);
/* If there is already pending data, append new data after
* the old one.
*/
if (tcp->send) {
net_buf_frag_add(tcp->send, data);
} else {
tcp->send = data;
}
if (unlikely(!data)) {
tcp->send_seq = seq + data_size;
return -ENOMEM;
}
}
/* Send the segment. */
segment.src_addr = &tcp->context->local;
segment.dst_addr = remote;
segment.seq = tcp->send_seq;
segment.ack = tcp->send_ack;
segment.flags = flags;
segment.wnd = get_recv_wnd(tcp);
segment.options = options;
segment.optlen = optlen;
segment.data = tcp->send;
*send_buf = prepare_segment(tcp, &segment);
if (!*send_buf) {
if (segment.data) {
/* tcp->send is not yet freed if we get here */
net_nbuf_unref(tcp->send);
}
tcp->send = NULL;
ret = -EINVAL;
}
tcp->send_seq = seq + data_size;
if (seq_greater(tcp->send_seq, tcp->recv_max_ack)) {
tcp->recv_max_ack = tcp->send_seq;
}
return ret;
}
static void net_tcp_set_syn_opt(struct net_tcp *tcp, uint8_t *options,
uint8_t *optionlen)
{
*optionlen = 0;
/* If 0, detect MSS based on interface MTU minus "TCP,IP header size"
*/
if (tcp->recv_mss == 0) {
sa_family_t family = net_context_get_family(tcp->context);
if (family == AF_INET) {
#if defined(CONFIG_NET_IPV4)
struct net_if *iface =
net_context_get_iface(tcp->context);
if (iface) {
/* MTU - [TCP,IP header size]. */
tcp->recv_mss = iface->mtu - 40;
}
#else
tcp->recv_mss = 0;
#endif /* CONFIG_NET_IPV4 */
}
#if defined(CONFIG_NET_IPV6)
else if (family == AF_INET6) {
tcp->recv_mss = 1280;
}
#endif /* CONFIG_NET_IPV6 */
else {
tcp->recv_mss = 0;
}
}
*((uint32_t *)(options + *optionlen)) =
htonl((uint32_t)(tcp->recv_mss | NET_TCP_MSS_HEADER));
*optionlen += NET_TCP_MSS_SIZE;
return;
}
int net_tcp_prepare_ack(struct net_tcp *tcp, const struct sockaddr *remote,
struct net_buf **buf)
{
uint8_t options[NET_TCP_MAX_OPT_SIZE];
uint8_t optionlen;
switch (tcp->state) {
case NET_TCP_SYN_RCVD:
/* In the SYN_RCVD state acknowledgment must be with the
* SYN flag.
*/
tcp->send_seq--;
net_tcp_set_syn_opt(tcp, options, &optionlen);
net_tcp_prepare_segment(tcp, NET_TCP_SYN | NET_TCP_ACK,
options, optionlen, remote, buf);
break;
case NET_TCP_FIN_WAIT_1:
case NET_TCP_LAST_ACK:
/* In the FIN_WAIT_1 and LAST_ACK states acknowledgment must
* be with the FIN flag.
*/
tcp->send_seq--;
net_tcp_prepare_segment(tcp, NET_TCP_FIN | NET_TCP_ACK,
0, 0, remote, buf);
break;
default:
net_tcp_prepare_segment(tcp, NET_TCP_ACK, 0, 0, remote, buf);
break;
}
return 0;
}
int net_tcp_prepare_reset(struct net_tcp *tcp,
const struct sockaddr *remote,
struct net_buf **buf)
{
struct tcp_segment segment = { 0 };
if ((net_context_get_state(tcp->context) != NET_CONTEXT_UNCONNECTED) &&
(tcp->state != NET_TCP_SYN_SENT) &&
(tcp->state != NET_TCP_TIME_WAIT)) {
if (tcp->state == NET_TCP_SYN_RCVD) {
/* Send the reset segment with acknowledgment. */
segment.seq = 0;
segment.ack = tcp->send_ack;
segment.flags = NET_TCP_RST | NET_TCP_ACK;
} else {
/* Send the reset segment without acknowledgment. */
segment.seq = tcp->recv_ack;
segment.ack = 0;
segment.flags = NET_TCP_RST;
}
segment.src_addr = &tcp->context->local;
segment.dst_addr = remote;
segment.wnd = 0;
segment.options = NULL;
segment.optlen = 0;
segment.data = NULL;
*buf = prepare_segment(tcp, &segment);
}
return 0;
}
const char const *net_tcp_state_str(enum net_tcp_state state)
{
#if NET_DEBUG
switch (state) {
case NET_TCP_CLOSED:
return "CLOSED";
case NET_TCP_LISTEN:
return "LISTEN";
case NET_TCP_SYN_SENT:
return "SYN_SENT";
case NET_TCP_SYN_RCVD:
return "SYN_RCVD";
case NET_TCP_ESTABLISHED:
return "ESTABLISHED";
case NET_TCP_CLOSE_WAIT:
return "CLOSE_WAIT";
case NET_TCP_LAST_ACK:
return "LAST_ACK";
case NET_TCP_FIN_WAIT_1:
return "FIN_WAIT_1";
case NET_TCP_FIN_WAIT_2:
return "FIN_WAIT_2";
case NET_TCP_TIME_WAIT:
return "TIME_WAIT";
case NET_TCP_CLOSING:
return "CLOSING";
}
#endif
return "";
}
void net_tcp_init(void)
{
k_sem_init(&tcp_lock, 0, UINT_MAX);
k_sem_give(&tcp_lock);
}
#define FIN_TIMEOUT (2 * NET_TCP_MAX_SEG_LIFETIME * MSEC_PER_SEC)
static void fin_timeout(struct k_work *work)
{
struct net_tcp *tcp = CONTAINER_OF(work, struct net_tcp, fin_timer);
NET_DBG("Remote peer didn't confirm connection close");
net_context_put(tcp->context);
}
void net_tcp_change_state(struct net_tcp *tcp,
enum net_tcp_state new_state)
{
NET_ASSERT(tcp);
if (tcp->state == new_state) {
return;
}
NET_ASSERT(new_state >= NET_TCP_CLOSED &&
new_state <= NET_TCP_CLOSING);
NET_DBG("%s (%d) => %s (%d)",
net_tcp_state_str(tcp->state), tcp->state,
net_tcp_state_str(new_state), new_state);
tcp->state = new_state;
if (tcp->state == NET_TCP_FIN_WAIT_1) {
/* Wait up to 2 * MSL before destroying this socket. */
k_delayed_work_cancel(&tcp->fin_timer);
k_delayed_work_init(&tcp->fin_timer, fin_timeout);
k_delayed_work_submit(&tcp->fin_timer, FIN_TIMEOUT);
}
if (tcp->state != NET_TCP_CLOSED) {
return;
}
if (!tcp->context) {
return;
}
/* Remove any port handlers if we are closing */
if (tcp->context->conn_handler) {
net_tcp_unregister(tcp->context->conn_handler);
tcp->context->conn_handler = NULL;
}
if (tcp->context->accept_cb) {
tcp->context->accept_cb(tcp->context,
&tcp->context->remote,
sizeof(struct sockaddr),
-ENETRESET,
tcp->context->user_data);
}
}