1 Either the data transaction is taking place in only one direction (which means that we are the receiver and not transmitting any data) or in the case where we are sending out data also, the window advertised from the other end is constant. The latter means that we have not transmitted any data from our side for quite some time but are receiving data from the other end. The receive window advertised by the other end is constant.
Other than PSH|ACK flags in the TCP header, no other flag is set (ACK is set for each TCP segment).
This means that if any other flag is set such as URG, FIN, SYN, ECN, RST, and CWR, we know that something important is there to be attended and we need to move into the SLOW path.
The header length has unchanged. If the TCP header length remains unchanged, we have not added/reduced any TCP option and we can safely assume that there is nothing important to be attended, if the above two conditions are TRUE.
1 When we have read past an urgent byte in tcp_recvmsg() . Wehave gotten an urgent byte and we remain in the slow path mode until we receive the urgent byte because it is handled in the slow path in tcp_rcv_established().
2 当在tcp_data_queue中乱序队列由于gap被填充而处理完毕时,运行tcp_fast_path_check。
3 tcp_ack_update_window()中更新了通告窗口。
int tcp_header_len = tp->tcp_header_len;
/* Timestamp header prediction: tcp_header_len
* is automatically equal to th->doff*4 due to pred_flags
* match.
*/
/* Check timestamp */
//相等说明tcp timestamp option被打开。
if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
/* No? Slow path! */
//这里主要是parse timestamp选项,如果返回0则表明pase出错,此时我们进入slow_path
if (!tcp_parse_aligned_timestamp(tp, th))
goto slow_path;
/* If PAWS failed, check it more carefully in slow path */
//如果上面pase成功,则tp对应的rx_opt域已经被正确赋值,此时如果rcv_tsval(新的接收的数据段的时间戳)比ts_recent(对端发送过来的数据(也就是上一次)的最新的一个时间戳)小,则我们要进入slow path 处理paws。
if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
goto slow_path;
/* DO NOT update ts_recent here, if checksum fails
* and timestamp was corrupted part, it will result
* in a hung connection since we will drop all
* future packets due to the PAWS test.
*/
}
该代码段是依据时戳选项来检查PAWS(Protect Against Wrapped Sequence numbers)。
如果发送来的仅是一个TCP头的话(没有捎带数据或者接收端检测到有乱序数据这些情况时都会发送一个纯粹的ACK包)
12345678910111213141516171819202122
/* Bulk data transfer: sender */
if (len == tcp_header_len) {
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
*/
if (tcp_header_len ==
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
/* We know that such packets are checksummed
* on entry.
*/
tcp_ack(sk, skb, 0);
__kfree_skb(skb);
tcp_data_snd_check(sk);
return 0;
} else { /* Header too small */
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
/*
* Check if sending an ack is needed.
*/
static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
{
struct tcp_sock *tp = tcp_sk(sk);
/* More than one full frame received... */
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss
/* ... and right edge of window advances far enough.
* (tcp_recvmsg() will send ACK otherwise). Or...
*/
&& __tcp_select_window(sk) >= tp->rcv_wnd) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* We have out of order data. */
(ofo_possible && skb_peek(&tp->out_of_order_queue))) {
/* Then ack it now */
tcp_send_ack(sk);
} else {
/* Else, send delayed ack. */
tcp_send_delayed_ack(sk);
}
}
这里有个疑问,就是当ucopy应用读到需要读取到的数据包后,也即在一次处理中
12
if (tp->copied_seq == tp->rcv_nxt &&
len - tcp_header_len <= tp->ucopy.len) {
/* Does PAWS and seqno based validation of an incoming segment, flags will
* play significant role here.
*/
static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, int syn_inerr)
{
struct tcp_sock *tp = tcp_sk(sk);
/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
tcp_send_dupack(sk, skb);
goto discard;
}
/* Reset is accepted even if it did not pass PAWS. */
}
/* Step 1: check sequence number */
if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
* And page 69: "If an incoming segment is not acceptable,
* an acknowledgment should be sent in reply (unless the RST
* bit is set, if so drop the segment and return)".
*/
if (!th->rst)
tcp_send_dupack(sk, skb);
goto discard;
}
/* Step 2: check RST bit */
if (th->rst) {
tcp_reset(sk);
goto discard;
}
/* ts_recent update must be made after we are sure that the packet
* is in window.
*/
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
/* step 3: check security and precedence [ignored] */
/* step 4: Check for a SYN in window. */
if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
if (syn_inerr)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
tcp_reset(sk);
return -1;
}
return 1;
discard:
__kfree_skb(skb);
return 0;
}
1 The difference between the timestamp value obtained in the current segmentand last seen timestamp on the incoming TCP segment should be more than TCP_PAWS_WINDOW (= 1), which means that if the segment that was transmitted 1 clock tick before the segment that reached here earlier TCP seq should be acceptable.
It may be because of reordering of the segments that the latter reached earlier.
2 the 24 days have not elapsed since last time timestamp was stored,
3 tcp_disordered_ack返回0.
2)TCP需在数据缓存区的头部预留足够的空间,用来填充各层首部。MAX_TCP_HEADER是各层首部长度的总和,它考虑了最坏的情况:由于TCP层不知道将要用哪个接口发送包,它为每一层预留了最大的首部长度,甚至还考虑了出现多个IP首部的可能性,因为在内核编译支持IP over IP的情况下,会遇到多个IP首部。
int register_netdev(struct net_device *dev)
{
int err;
rtnl_lock();
/*
* If the name is a format string the caller wants us to do a
* name allocation.
*/
if (strchr(dev->name, '%')) {
// 这里通过dev_alloc_name函数来对设备名进行设置。
err = dev_alloc_name(dev, dev->name);
if (err < 0)
goto out;
}
// 注册当前的网络设备到全局的网络设备链表中.下面会详细看这个函数.
err = register_netdevice(dev);
out:
rtnl_unlock();
return err;
}
struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
int offset;
__wsum csum;
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
goto out;
if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
int type = skb_shinfo(skb)->gso_type;
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
!(type & (SKB_GSO_UDP))))
goto out;
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
segs = NULL;
goto out;
}
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
* do checksum of UDP packets sent as multiple IP fragments.
*/
offset = skb->csum_start - skb_headroom(skb);
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
*(__sum16 *)(skb->data + offset) = csum_fold(csum);
skb->ip_summed = CHECKSUM_NONE;
计算udp的checksum
/* Fragment the skb. IP headers of the fragments are updated in
* inet_gso_segment()
*/
segs = skb_segment(skb, features);
out:
return segs;
}
F-RTO:Forward RTO-Recovery,for a TCP sender to recover after a retransmission timeout.
F-RTO的主要目的:The main motivation of the algorithm is to recover efficiently from a spurious
RTO.
F-RTO的基本思想
The guideline behind F-RTO is, that an RTO either indicates a loss, or it is caused by an
excessive delay in packet delivery while there still are outstanding segments in flight. If the
RTO was due to delay, i.e. the RTO was spurious, acknowledgements for non-retransmitted
segments sent before the RTO should arrive at the sender after the RTO occurred. If no such
segments arrive, the RTO is concluded to be non-spurious and the conventional RTO
recovery with go-back-N retransmissions should take place at the TCP sender.
To implement the principle described above, an F-RTO sender acts as follows: if the first ACK
arriving after a RTO-triggered retransmission advances the window, transmit two new segments
instead of continuing retransmissions. If also the second incoming acknowledgement advances
the window, RTO is likely to be spurious, because the second ACK is triggered by an originally
transmitted segment that has not been retransmitted after the RTO. If either one of the two
acknowledgements after RTO is a duplicate ACK, the sender continues retransmissions similarly
to the conventional RTO recovery algorithm.
When the retransmission timer expires, the F-RTO algorithm takes the following steps at the TCP
sender. In the algorithm description below we use SND.UNA to indicate the first unacknowledged
segment.
1.When the retransmission timer expires, retransmit the segment that triggered the timeout. As
required by the TCP congestion control specifications, the ssthresh is adjusted to half of the
number of currently outstanding segments. However, the congestion window is not yet set to one
segment, but the sender waits for the next two acknowledgements before deciding on what to do
with the congestion window.
2.When the first acknowledgement after RTO arrives at the sender, the sender chooses the
following actions depending on whether the ACK advances the window or whether it is a duplicate
ACK.
(a)If the acknowledgement advances SND.UNA, transmit up to two new (previously unsent)
segments. This is the main point in which the F-RTO algorithm differs from the conventional way
of recovering from RTO. After transmitting the two new segments, the congestion window size
is set to have the same value as ssthresh. In effect this reduces the transmission rate of the
sender to half of the transmission rate before the RTO. At this point the TCP sender has transmitted
a total of three segments after the RTO, similarly to the conventional recovery algorithm. If
transmitting two new segments is not possible due to advertised window limitation, or because
there is no more data to send, the sender may transmit only one segment. If now new data can
be transmitted, the TCP sender follows the conventional RTO recovery algorithm and starts
retransmitting the unacknowledged data using slow start.
(b)If the acknowledgement is duplicate ACK, set the congestion window to one segment and
proceed with the conventional RTO recovery. Two new segments are not transmitted in this case,
because the conventional RTO recovery algorithm would not transmit anything at this point either.
Instead, the F-RTO sender continues with slow start and performs similarly to the conventional
TCP sender in retransmitting the unacknowledged segments. Step 3 of the F-RTO algorithm is
not entered in this case. A common reason for executing this branch is the loss of a segment,
in which case the segments injected by the sender before the RTO may still trigger duplicate
ACKs that arrive at the sender after the RTO.
3.When the second acknowledgement after the RTO arrives, either continue transmitting new
data, or start retransmitting with the slow start algorithm, depending on whether new data was
acknowledged.
(a)If the acknowledgement advances SND.UNA, continue transmitting new data following
the congestion avoidance algorithm. Because the TCP sender has retransmitted only one
segment after the RTO, this acknowledgement indicates that an originally transmitted
segment has arrived at the receiver. This is regarded as a strong indication of a suprious
RTO. However, since the TCP sender cannot surely know at this point whether the segment
that triggered the RTO was actually lost, adjusting the congestion control parameters after
the RTO is the conservative action. From this point on, the TCP sender continues as in the
normal congestion avoidance.
If this algorithm branch is taken, the TCP sender ignores the send_high variable that indicates
the highest sequence number transmitted so far. The send_high variable was proposed as a
bugfix for avoiding unnecessary multiple fast retransmits when RTO expires during fast recovery
with NewReon TCP. As the sender has not retransmitted other segments but the one that
triggered RTO, the problem addressed by the bugfix cannot occur. Therefore, if there are
duplicate ACKs arriving at the sender after the RTO, they are likely to indicate a packet loss,
hence fast retransmit should bu used to allow efficient recovery. Alternatively, if there are not
enough duplicate ACKs arriving at the sender after a packet loss, the retransmission timer
expires another time and the sender enters step 1 of this algorithm to detect whether the
new RTO is spurious.
(b)If the acknowledgement is duplicate ACK, set the congestion window to three segments,
continue with the slow start algorithm retransmitting unacknowledged segments. The duplicate
ACK indicates that at least one segment other than the segment that triggered RTO is lost in the
last window of data. There is no sufficient evidence that any of the segments was delayed.
Therefore the sender proceeds with retransmissions similarly to the conventional RTO recovery
algorithm, with the send_high variable stored when the retransmission timer expired to avoid
unnecessary fast retransmits.
引起RTO的主要因素:
(1)Sudden delays
The primary motivation of the F-RTO algorithm is to improve the TCP performance when sudden
delays cause spurious retransmission timeouts.
(2)Packet losses
These timeouts occur mainly when retransmissions are lost, since lost original packets are
usually recovered by fast retransmit.
(3)Bursty losses
Losses of several successive packets can result in a retransmission timeout.
造成虚假RTO的原因还有:
Wireless links may also suffer from link outages that cause persistent data loss for a period
of time.
Oher potential reasons for sudden delays that have been reported to trigger spurious RTOs
include a delay due to tedious actions required to complete a hand-off or re-routing of packets
to the new serving access point after the hand-off, arrival of competing traffic on a shared link
with low bandwidth, and a sudden bandwidth degradation due to reduced resources on a
wireless channel.
造成真实RTO的原因:
A RTO-triggered retransmission is needed when a retransmission is lost, or when nearly a whole
window of data is lost, thus making it impossible for the receiver to generate enough duplicate
ACKs for triggering TCP fast retransmit.
虚假RTO的后果
If no segments were lost but the retransmission timer expires spuriously, the segments retransmitted
in the slow-start are sent unnecessarily. Particularly, this phenomenon is very possible with the
various wireless access network technologies that are prone to sudden delay spikes.
The retransmission timer expires because of the delay, spuriously triggering the RTO recovery and
unnecessarily retransmission of all unacknowledged segments. This happens because after the
delay the ACKs for the original segments arrive at the sender one at the time but too late, because
the TCP sender has already entered the RTO recovery. Therefore, each of the ACKs trigger the
retransmission of segments for which the original ACKs will arrive after a while. This continues
until the whole window of segments is eventually unnecessarily retransmitted. Furthermore,
because a full window of retransmitted segments arrive unnecessarily at the receiver, it generates
duplicate ACKs for these out-of-order segments. Later on, the duplicate ACKs unnecessarily
trigger fast retransmit at the sender.
TCP uses the fast retransmit mechanism to trigger retransmissions after receiving three successive
duplicate acknowledgements (ACKs). If for a certain time period TCP sender does not receive ACKs
that acknowledge new data, the TCP retransmission timer expires as a backoff mechanism.
When the retransmission time expires, the TCP sender retransmits the first unacknowledged
segment assuming it was lost in the network. Because a retransmission timeout (RTO) can be
an indication of severe congestion in the network, the TCP sender resets its congestion window
to one segment and starts increasing it according to the slow start algorithm.
However, if the RTO occurs spuriously and there still are segments outstanding in the network,
a false slow start is harmful for the potentially congested network as it injects extra segments
to the network at increasing rate.
How about Reliable link-layer protocol ?
Since wireless networks are often subject to high packet loss rate due to corruption or hand-offs,
reliable link-layer protocols are widely employed with wireless links. The link-layer receiver often
aims to deliver the packets to the upper protocol layers in order, which implies that the later
arriving packets are blocked until the head of the queue arrives successfully. Due to the strict
link-layer ordering, the communication end point observe a pause in packet delivery that can
cause a spurious TCP RTO instead of getting out-of-order packets that could result in a false
fast retransmit instead. Either way, interaction between TCP retransmission mechanisms
and link-layer recovery can cause poor performance.
DSACK不能解决此问题
If the unnecessary retransmissions occurred due to spurious RTO caused by a sudden delay,
the acknowledgements with the DSACK information arrive at the sender only after the
acknowledgements of the original segments. Therefore, the unnecessary retransmissions
following the spurious RTO cannot be avoided by using DSACK. Instead, the suggested
recovery algorithm using DSACK can only revert the congestion control parameters to the
state preceding the spurious retransmissions.
F-RTO实现
F-RTO is implemented (mainly) in four functions:
(1)tcp_use_frto() is used to determine if TCP can use F-RTO.
(2)tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is called when tcp_use_frto() showed green light.
(3)tcp_process_frto() handles incoming ACKs during F-RTO algorithm.
(4)tcp_enter_frto_loss() is called if there is not enough evidence to prove that the RTO is indeed spurious. It transfers the control from F-RTO to the conventional RTO recovery.
/* F-RTO can only be used if TCP has never retransmitted anything other than
* head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
*/
int tcp_use_frto(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb;
if (! sysctl_tcp_frto)
return 0;
/* MTU probe and F-RTO won't really play nicely along currently */
if (icsk->icsk_mtup.probe_size)
return 0;
if (tcp_is_sackfrto(tp))
return 1;
/* Avoid expensive walking of rexmit queue if possible */
if (tp->retrans_out > 1)
return 0; /* 不能重过传除了head以外的数据*/
skb = tcp_write_queue_head(sk);
if (tcp_skb_is_last(sk, skb))
return 1;
skb = tcp_write_queue_next(sk, skb); /* Skips head */
tcp_for_write_queue_from(skb, sk) {
if (skb == tcp_send_head(sk))
break;
if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
return 0; /* 不允许处head以外的数据包被重传过 */
/* Short-circut when first non-SACKed skb has been checked */
if (! (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
break;
}
return 1;
}
static int tcp_is_sackfrto(const struct tcp_sock *tp)
{
return (sysctl_tcp_frto == 0x2) && ! tcp_is_reno(tp);
}
/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, which
* indicates that we should follow the traditional RTO recovery, i.e. mark
* erverything lost and do go-back-N retransmission.
*/
static void tcp_enter_frto_loss (struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int cnt = 0;
/* 进入Loss状态后,清零SACK、lost、retrans_out等数据*/
tp->sacked_out = 0;
tp->lost_out = 0;
tp->fackets_out = 0;
/* 遍历重传队列,重新标志LOST。对于那些在RTO发生后传输
* 的数据不用标志为LOST。
*/
sk_stream_for_retrans_queue(skb, sk) {
cnt += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
/* 对于那些没被SACK的数据包,需要把它标志为LOST。*/
if (! (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
/* Do not mark those segments lost that were forward
* transmitted after RTO.
*/
if (! after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
{
TCP_SKB_CB(skb)->sacked |= TCP_LOST;
tp->lost_out += tcp_skb_pcount(skb);
}
} else { /* 对于那些已被sacked的数据包,则不用标志LOST。*/
tp->sacked_out += tcp_skb_pcount(skb);
tp->fackets_out = cnt;
}
}
tcp_syn_left_out(tp);
tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp) + 1;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->undo_marker = 0; /* 不需要undo标志*/
tp->frto_counter = 0; /* 表示F-RTO结束了*/
/* 更新乱序队列的最大值*/
tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss); /* 进入loss状态*/
tp->high_seq = tp->frto_highmark; /*RTO时的最大序列号*/
TCP_ECN_queue_cwr(tp); /* 设置显示拥塞标志*/
clear_all_retrans_hints(tp);
}
3.2.12的F-RTO
F-RTO spurious RTO detection algorithm (RFC4138)
F-RTO affects during two new ACKs following RTO (well, almost, see inline
comments). State (ACK number) is kept in frto_counter. When ACK advances
window (but not to or beyond highest sequence sent before RTO) :
On First ACK, send two new segments out.
On second ACK, RTO was likely spurious. Do spurious response (response
algorithm is not part of the F-RTO detection algorithm given in RFC4138 but
can be selected separately).
Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss and
TCP falls back to conventional RTO recovery. F-RTO allows overriding of Nagle,
this is done using frto_counter states 2 and 3, when a new data segment of any
size sent during F-RTO, state 2 is upgraded to 3.
Rationale: if the RTO was suprious, new ACKs should arrive from the original
window even after we transmit two new data segments.
SACK version:
on first step, wait until first cumulative ACK arrives, then move to the second
step. In second step, the next ACK decides.
/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
* which indicates that we should follow the tradditional RTO recovery,
* i.e. mark everything lost and do go-back-N retransmission.
*/
static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
tp->lost_out = 0;
tp->retrans_out = 0;
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
tcp_for_write_queue(skb, sk) {
if (skb == tcp_send_head(sk))
break;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
/*
* Count the retransmission made on RTO correctly (only when waiting for
* the first ACK and did not get it.
*/
if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
/* For some reason this R-bit might get cleared ? */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out += tcp_skb_pcount(skb);
/* enter this if branch just for the first segment */
flag |= FLAG_DATA_ACKED;
} else {
if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
tp->undo_marker = 0;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
}
/* Marking forward transmissions that were made after RTO lost can
* cause unnecessary retransmissions in some scenarios,
* SACK blocks will mitigate that in some but not in all cases.
* We used to not mark them but it was casuing break-ups with
* receivers that do only in-order receival.
*
* TODO: we could detect presence of such receiver and select different
* behavior per flow.
*/
if (! (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
}
}
tcp_verify_left_out(tp);
/* allowed_segments应该不大于3*/
tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->frto_counter = 0; /* F-RTO结束了*/
tp->bytes_acked = 0;
/* 更新乱序队列的最大长度*/
tp->reordering = min_t(unsigned int, tp->reordering,
sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss); /*设置成Loss状态*/
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp); /*设置显式拥塞标志*/
tcp_clear_all_retrans_hints(tp);
}