kk Blog —— 通用基础


date [-d @int|str] [+%s|"+%F %T"]
netstat -ltunp
sar -n DEV 1

SYN-ACK 重传

fastopen synack 重传

1
2
3
4
5
6
7
8
int tcp_conn_request(struct request_sock_ops *rsk_ops,
			const struct tcp_request_sock_ops *af_ops,
			struct sock *sk, struct sk_buff *skb)
{
	...
	fastopen = !want_cookie &&
			tcp_try_fastopen(sk, skb, req, &foc, dst);
	...
1
2
3
4
5
6
7
8
bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
			struct request_sock *req,
			struct tcp_fastopen_cookie *foc,
			struct dst_entry *dst)
{
	...
	if (tcp_fastopen_create_child(sk, skb, dst, req)) {
	...
1
2
3
4
5
6
7
8
9
10
11
12
static bool tcp_fastopen_create_child(struct sock *sk,
				struct sk_buff *skb,
				struct dst_entry *dst,
				struct request_sock *req)
{
	...
	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
	...
	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
				TCP_TIMEOUT_INIT, TCP_RTO_MAX);
	...
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void tcp_retransmit_timer(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct inet_connection_sock *icsk = inet_csk(sk);

	if (tp->fastopen_rsk) {
		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
			sk->sk_state != TCP_FIN_WAIT1);
		tcp_fastopen_synack_timer(sk);
		/* Before we receive ACK to our SYN-ACK don't retransmit
		 * anything else (e.g., data or FIN segments).
		 */
		return;
	}
	...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
 *      Timer for Fast Open socket to retransmit SYNACK. Note that the
 *      sk here is the child socket, not the parent (listener) socket.
 */
static void tcp_fastopen_synack_timer(struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	int max_retries = icsk->icsk_syn_retries ? :
		sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
	struct request_sock *req;

	req = tcp_sk(sk)->fastopen_rsk;
	req->rsk_ops->syn_ack_timeout(sk, req);

	if (req->num_timeout >= max_retries) {
		tcp_write_err(sk);
		return;
	}
	/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
	 * returned from rtx_syn_ack() to make it more persistent like
	 * regular retransmit because if the child socket has been accepted
	 * it's not good to give up too easily.
	 */
	inet_rtx_syn_ack(sk, req);
	req->num_timeout++;
	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
		TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
}

http://blog.csdn.net/u011130578/article/details/44954891

1 Why

TCP服务器在收到SYN请求后发送SYN|ACK响应,然后等待对端的ACK到来以完成三次握手。如果没有收到ACK,TCP应该重传SYN|ACK,这个功能由SYN-ACK定时器完成。由于SYN|ACK发送后并没有放入发送队列中,故重传时必须重新构建SYN|ACK报文。

2 When

TCP在发送SYN|ACK响应后设置SYN-ACK定时器:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
1465 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1466 {
...
1598     skb_synack = tcp_make_synack(sk, dst, req,
1599         fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); //构建SYN|ACK
1600
1601     if (skb_synack) {
1602         __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
1603         skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1604     } else
1605         goto drop_and_free;
1606
1607     if (likely(!do_fastopen)) {
1608         int err;
1609         err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1610              ireq->rmt_addr, ireq->opt); //发送SYN|ACK
...
1617         /* Add the request_sock to the SYN table */
1618         inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); //将requese sock加入到SYN表中,并设置SYN-ACK定时器
...

inet_csk_reqsk_queue_hash_add函数:

1
2
3
4
5
6
7
8
9
10
11
521 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
522                    unsigned long timeout)
523 {
524     struct inet_connection_sock *icsk = inet_csk(sk);
525     struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
526     const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
527                      lopt->hash_rnd, lopt->nr_table_entries);
528
529     reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); //将request_sock放入syn_table中并记录超时时间
530     inet_csk_reqsk_queue_added(sk, timeout); //设置SYN-ACK定时器
531 }

reqsk_queue_hash_req函数会记录request_sock的超时时间:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
262 static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
263                     u32 hash, struct request_sock *req,
264                     unsigned long timeout)
265 {
266     struct listen_sock *lopt = queue->listen_opt;
267
268     req->expires = jiffies + timeout; //超时时间
269     req->num_retrans = 0;
270     req->num_timeout = 0;
271     req->sk = NULL;
272     req->dl_next = lopt->syn_table[hash];
273
274     write_lock(&queue->syn_wait_lock);
275     lopt->syn_table[hash] = req;
276     write_unlock(&queue->syn_wait_lock);
277 }

inet_csk_reqsk_queue_added函数为整个syn_table设置一个SYN-ACK定时器:

1
2
3
4
5
6
280 static inline void inet_csk_reqsk_queue_added(struct sock *sk,
281                           const unsigned long timeout)
282 {
283     if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) //如果添加request sock之前syn_table为空
284         inet_csk_reset_keepalive_timer(sk, timeout);//设置SYN-ACK定时器
285 }

inet_csk_reset_keepalive_timer函数真正设置定时器:

1
2
3
4
404 void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
405 {
406     sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
407 }

SYN-ACK定时器的超时时间为TCP_TIMEOUT_INIT(1秒)。

3 What

SYN-ACK定时器的结构为sk->sk_timer,其超时函数为tcp_keepalive_timer:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
558 static void tcp_keepalive_timer (unsigned long data)
559 {
560     struct sock *sk = (struct sock *) data;
561     struct inet_connection_sock *icsk = inet_csk(sk);
562     struct tcp_sock *tp = tcp_sk(sk);
563     u32 elapsed;
564
565     /* Only process if socket is not in use. */
566     bh_lock_sock(sk);
567     if (sock_owned_by_user(sk)) {
568         /* Try again later. */
569         inet_csk_reset_keepalive_timer (sk, HZ/20);
570         goto out;
571     }
572
573     if (sk->sk_state == TCP_LISTEN) { //如果是SYN-ACK定时器超时则判断为真
574         tcp_synack_timer(sk); //SYN-ACK定时器超时函数
575         goto out;
576     }
...

tcp_synack_timer函数:

1
2
3
4
5
534 static void tcp_synack_timer(struct sock *sk)
535 {
536     inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
537                    TCP_TIMEOUT_INIT, TCP_RTO_MAX);
538 }

inet_csk_reqsk_queue_prune函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
570 void inet_csk_reqsk_queue_prune(struct sock *parent,
571                 const unsigned long interval,
572                 const unsigned long timeout,
573                 const unsigned long max_rto)
574 {
575     struct inet_connection_sock *icsk = inet_csk(parent);
576     struct request_sock_queue *queue = &icsk->icsk_accept_queue;
577     struct listen_sock *lopt = queue->listen_opt;
578     int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
579     int thresh = max_retries;
580     unsigned long now = jiffies;
581     struct request_sock **reqp, *req;
582     int i, budget;
583
584     if (lopt == NULL || lopt->qlen == 0)
585         return;
...
604     if (lopt->qlen>>(lopt->max_qlen_log-1)) {
605         int young = (lopt->qlen_young<<1);
606
607         while (thresh > 2) {
608             if (lopt->qlen < young)
609                 break;
610             thresh--;
611             young <<= 1;
612         }
613     }
614
615     if (queue->rskq_defer_accept)    //需要等待数据到来再唤醒应用进程
616         max_retries = queue->rskq_defer_accept;
617
618     budget = 2 * (lopt->nr_table_entries / (timeout / interval));
619     i = lopt->clock_hand;
620
621     do {  //遍历SYN table
622         reqp=&lopt->syn_table[i];
623         while ((req = *reqp) != NULL) {
624             if (time_after_eq(now, req->expires)) { //超时
625                 int expire = 0, resend = 0;
626
627                 syn_ack_recalc(req, thresh, max_retries,
628                            queue->rskq_defer_accept,
629                            &expire, &resend);     //计算request sock是否过期以及是否需要重发SYN|ACK
630                 req->rsk_ops->syn_ack_timeout(parent, req); //调用tcp_syn_ack_timeout更新信息数据库
631                 if (!expire &&  //request socket没有超时
632                     (!resend ||
633                      !inet_rtx_syn_ack(parent, req) ||  //重传SYN-ACK
634                      inet_rsk(req)->acked)) {
635                     unsigned long timeo;
636
637                     if (req->num_timeout++ == 0)
638                         lopt->qlen_young--;
639                     timeo = min(timeout << req->num_timeout,
640                             max_rto);
641                     req->expires = now + timeo;  //更新request_sock超时时间
642                     reqp = &req->dl_next;
643                     continue;
644                 }
645
646                 /* Drop this request */
647                 inet_csk_reqsk_queue_unlink(parent, req, reqp); 
648                 reqsk_queue_removed(queue, req);
649                 reqsk_free(req);
650                 continue;
651             }
652             reqp = &req->dl_next;
653         }
654
655         i = (i + 1) & (lopt->nr_table_entries - 1);
656
657     } while (--budget > 0);
658
659     lopt->clock_hand = i;
660
661     if (lopt->qlen) //syn_table中还有成员
662         inet_csk_reset_keepalive_timer(parent, interval); //继续设置定时器,超时
663 }

604-611:当syn_table中剩余空间比较小时,需要减小最大重试次数,以便使旧的request_sock能够更快消亡,从而新的request_sock能够更多的被接受

647-649:将超时的request_sock移出syn_table并释放,即丢弃其对应的连接

631-642:全部满足下列条件就不删除request_sock而只是更新超时时间:
(1)request_sock没有超时
(2)下列3个条件之一成立
1)不需要重传SYN|ACK
2)重传SYN|ACK成功
3)应用进程使用TCP_DEFER_ACCEPT socket选项意图使数据到来时listen socket再唤醒进程,当ACK到来但没有数据时

syn_ack_recalc函数来确定request_sock是否超时以及是否需要重传SYN|ACK:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
539 static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
540                   const int max_retries,
541                   const u8 rskq_defer_accept,
542                   int *expire, int *resend)
543 {
544     if (!rskq_defer_accept) {    //不需要等待数据到来再调用accept系统调用
545         *expire = req->num_timeout >= thresh;    //超时次数达到限制则超时
546         *resend = 1;    //重传SYN|ACK
547         return;
548     }
549     *expire = req->num_timeout >= thresh &&    //超时次数达到限制
550           (!inet_rsk(req)->acked || req->num_timeout >= max_retries);    //ACK没有到来或超时次数达到最高上限
551     /*
552      * Do not resend while waiting for data after ACK,
553      * start to resend on end of deferring period to give
554      * last chance for data or ACK to create established socket.
555      */
556     *resend = !inet_rsk(req)->acked || //ACK没有到来
557           req->num_timeout >= rskq_defer_accept - 1;    //超时次数超过或即将达到应用进程的限制,赶快重传SYN|ACK以便给对端最后一个机会建立连接
558 }

综上,SYN|ACK定时器超时时重传SYN|ACK的条件是下列条件全部成立:
(1)request_sock超时
(2)request_sock的超时次数达到限制
(3)下列条件之一成立:
1)应用进程没有使用TCP_DEFER_ACCEPT socket选项来延迟accept request_sock的时间
2)应用进程使用TCP_DEFER_ACCEPT socket选项设置了超时次数限制,但ACK没有到来或,超时次数达到最高限制且超时次数超过或即将达到应用进程的限制

SYN|ACK的重传是由inet_rtx_syn_ack函数完成的:

1
2
3
4
5
6
7
8
560 int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
561 {
562     int err = req->rsk_ops->rtx_syn_ack(parent, req);    //指向tcp_v4_rtx_synack或tcp_v6_rtx_synack
563
564     if (!err)
565         req->num_retrans++;
566     return err;
567 }

tcp_v4_rtx_synack函数:

1
2
3
4
5
6
7
8
870 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
871 {
872     int res = tcp_v4_send_synack(sk, NULL, req, 0, false); //构建并发送SYN-ACK
873
874     if (!res)
875         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
876     return res;
877 }

kernel, net

« TCP Fast Open(TFO), tcp_fastopen 内存屏障 »