#define TFO_CLIENT_ENABLE 1
#define TFO_SERVER_ENABLE 2
#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */
/* Process SYN data but skip cookie validation */
#define TFO_SERVER_COOKIE_NOT_CHKED 0x100 // 收到cookie也不检查
/* Accept SYN data w/o any cookie option */
#define TFO_SERVER_COOKIE_NOT_REQD 0x200 // 不需要cookie需要data就能创建fastopen child,默认情况下syn的data会被忽略
/* Force enable TFO on all listeners, i.e., not requiring the
* TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen.
*/
#define TFO_SERVER_WO_SOCKOPT1 0x400 // 调listen后不需要再调setsockopt就开启fastopen
#define TFO_SERVER_WO_SOCKOPT2 0x800 // 调listen后不需要再调setsockopt就开启fastopen,backlog=TFO_SERVER_WO_SOCKOPT2>>16
/* Always create TFO child sockets on a TFO listener even when
* cookie/data not present. (For testing purpose!)
*/
#define TFO_SERVER_ALWAYS 0x1000 // 不需要cookie也不需要data就创建fastopen child, 容易被攻击,不开启
测试
开启
1234
echo 3 > /proc/sys/net/ipv4/tcp_fastopen # 1 开启客户端,2 开启服务端,3 都开启
tc qdisc add dev lo root netem delay 300ms # 设置延迟才能看出效果
ifconfig lo mtu 1500
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
struct sk_buff *syn_data = NULL, *data;
unsigned long last_syn_loss = 0;
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
&syn_loss, &last_syn_loss);//查询缓存的TFO cookie信息
/* Recurring FO SYN losses: revert to regular handshake temporarily */
if (syn_loss > 1 &&
time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
fo->cookie.len = -1;
goto fallback;
}
if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)//无论有没有cookie,都发送携带数据的SYN
fo->cookie.len = -1;
else if (fo->cookie.len <= 0) //没有cookie,发送携带TFO请求选项的SYN
goto fallback;
/* MSS for SYN-data is based on cached MSS and bounded by PMTU and
* user-MSS. Reserve maximum option space for middleboxes that add
* private TCP options. The cost is reduced data space in SYN :(
*/
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;//计算SYN包中的能够携带的数据的最大大小
syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
sk->sk_allocation);//复制SYN包中的内容,并扩展SKB中的空间
if (syn_data == NULL)
goto fallback;
for (i = 0; i < iovlen && syn_data->len < space; ++i) {//将用户态中缓存的数据copy到内核
struct iovec *iov = &fo->data->msg_iov[i];
unsigned char __user *from = iov->iov_base;
int len = iov->iov_len;
if (syn_data->len + len > space)//数据总长度大于SKB中空间的总大小
len = space - syn_data->len;
else if (i + 1 == iovlen)
/* No more data pending in inet_wait_for_connect() */
fo->data = NULL;//数据全部发送完毕,不需要在inet_wait_for_connect中等待时发送
if (skb_add_data(syn_data, from, len))//将用户数据copy到SKB中
goto fallback;
}
/* Queue a data-only packet after the regular SYN for retransmission */
data = pskb_copy(syn_data, sk->sk_allocation);
if (data == NULL)
goto fallback;
TCP_SKB_CB(data)->seq++;
TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
tcp_connect_queue_skb(sk, data);
fo->copied = data->len;
if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {//发送携带数据的SYN
tp->syn_data = (fo->copied > 0);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
goto done;
}
syn_data = NULL;
fallback:
/* Send a regular SYN with Fast Open cookie request option */
if (fo->cookie.len > 0)
fo->cookie.len = 0;
err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
if (err)
tp->syn_fastopen = 0;
kfree_skb(syn_data);
done:
fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
return err;
}
static int tcp_v4_conn_req_fastopen(struct sock *sk,
struct sk_buff *skb,
struct sk_buff *skb_synack,
struct request_sock *req)
{
struct tcp_sock *tp = tcp_sk(sk);
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const struct inet_request_sock *ireq = inet_rsk(req);
struct sock *child;
...
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);//生成子socket,其状态为TCP_SYN_RECV
...
err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
ireq->rmt_addr, ireq->opt);//构建SYN|ACK的IP头并将其发送出去
err = net_xmit_eval(err);
if (!err)
tcp_rsk(req)->snt_synack = tcp_time_stamp;
/* XXX (TFO) - is it ok to ignore error and continue? */
spin_lock(&queue->fastopenq->lock);
queue->fastopenq->qlen++;//将这个连接计入TFO queue
spin_unlock(&queue->fastopenq->lock);
...
tp = tcp_sk(child);
tp->fastopen_rsk = req;
/* Do a hold on the listner sk so that if the listener is being
* closed, the child that has been accepted can live on and still
* access listen_lock.
*/
sock_hold(sk);
tcp_rsk(req)->listener = sk;
/* RFC1323: The window in SYN & SYN/ACK segments is never
* scaled. So correct it appropriately.
*/
tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
/* Activate the retrans timer so that SYNACK can be retransmitted.
* The request socket is not added to the SYN table of the parent
* because it's been added to the accept queue directly.
*/
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
/* Add the child socket directly into the accept queue */
inet_csk_reqsk_queue_add(sk, req, child);
/* Now finish processing the fastopen child socket. */
inet_csk(child)->icsk_af_ops->rebuild_header(child);
tcp_init_congestion_control(child);
tcp_mtup_init(child);
tcp_init_buffer_space(child);
tcp_init_metrics(child);
/* Queue the data carried in the SYN packet. We need to first
* bump skb's refcnt because the caller will attempt to free it.
*
* XXX (TFO) - we honor a zero-payload TFO request for now.
* (Any reason not to?)
*/
if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {//SYN包中没有数据
/* Don't queue the skb if there is no payload in SYN.
* XXX (TFO) - How about SYN+FIN?
*/
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
} else {
skb = skb_get(skb);
skb_dst_drop(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
skb_set_owner_r(skb, child);
__skb_queue_tail(&child->sk_receive_queue, skb);//将数据放入child的接收队列中
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
tp->syn_data_acked = 1;
}
sk->sk_data_ready(sk, 0);//通知持有listening socket的进程调用accept系统调用创建新连接
bh_unlock_sock(child);
sock_put(child);
WARN_ON(req->sk == NULL);
return 0;
}