解读一:RFC793 P17,描述是“The urgent pointer points to the sequence number of the octet following the urgent data.”,在P41有描述“This mechanism permits a point in the data stream to be designated as the end of urgent information. Whenever this point is in advance of the receive sequence number (RCV.NXT) at the receiving TCP, that TCP must tell the user to go into “urgent mode”; when the receive sequence number catches up to the urgent pointer, the TCP must tell user to go”,可以认为是:当前接收的报文中SEQ在SEG.SEQ+Urgent Pointer之前的都是,而urgent pointer是第一个非urgent data( TCP已经接受,但是还没有提交给应用的数据是不是呢?)
解读二:在P56的描述是“If the urgent flag is set, then SND.UP <-SND.NXT-1 and set the urgent pointer in the outgoing segments”,也就是urgent pointer是最后一个urgent data字节。而在RFC1122中消除了这一歧义:在P84中说明“the urgent pointer points to the sequence number of the LAST octet (not LAST+1) in a sequence of urgent data”
int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t size)
{
。。。。。。。。。。。。。。
/*如果flags设置了MSG_OOB该接口其实返回的mss_now关闭了TSO功能*/
mss_now = tcp_send_mss(sk, &size_goal, flags);
。。。。。。。。。。。。。。
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
unsigned char __user *from = iov->iov_base;
iov++;
while (seglen > 0) {
int copy = 0;
int max = size_goal;
skb = tcp_write_queue_tail(sk);
if (tcp_send_head(sk)) {
if (skb->ip_summed == CHECKSUM_NONE)
max = mss_now;
copy = max - skb->len;
}
if (copy <= 0) {
new_segment:
/* Allocate new segment. If the interface is SG,
* allocate skb fitting to single page.
*/
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg),
sk->sk_allocation);
if (!skb)
goto wait_for_memory;
/*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
copy = size_goal;
max = size_goal;
}
/* Try to append data to the end of skb. */
if (copy > seglen)
copy = seglen;
/* Where to copy to? */
if (skb_availroom(skb) > 0) {
/* We have some space in skb head. Superb! */
copy = min_t(int, copy, skb_availroom(skb));
err = skb_add_data_nocache(sk, skb, from, copy);
if (err)
goto do_fault;
} else {
int merge = 0;
int i = skb_shinfo(skb)->nr_frags;
struct page *page = sk->sk_sndmsg_page;
int off;
if (page && page_count(page) == 1)
sk->sk_sndmsg_off = 0;
off = sk->sk_sndmsg_off;
if (skb_can_coalesce(skb, i, page, off) &&
off != PAGE_SIZE) {
/* We can extend the last page
* fragment. */
merge = 1;
} else if (i == MAX_SKB_FRAGS || !sg) {
/* Need to add new fragment and cannot
* do this because interface is non-SG,
* or because all the page slots are
* busy. */
tcp_mark_push(tp, skb);
goto new_segment;
} else if (page) {
if (off == PAGE_SIZE) {
put_page(page);
sk->sk_sndmsg_page = page = NULL;
off = 0;
}
} else
off = 0;
if (copy > PAGE_SIZE - off)
copy = PAGE_SIZE - off;
if (!sk_wmem_schedule(sk, copy))
goto wait_for_memory;
if (!page) {
/* Allocate new cache page. */
if (!(page = sk_stream_alloc_page(sk)))
goto wait_for_memory;
}
/* Time to copy data. We are close to
* the end! */
err = skb_copy_to_page_nocache(sk, from, skb,
page, off, copy);
if (err) {
/* If this page was new, give it to the
* socket so it does not get leaked.
*/
if (!sk->sk_sndmsg_page) {
sk->sk_sndmsg_page = page;
sk->sk_sndmsg_off = 0;
}
goto do_error;
}
/* Update the skb. */
if (merge) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
skb_fill_page_desc(skb, i, page, off, copy);
if (sk->sk_sndmsg_page) {
get_page(page);
} else if (off + copy < PAGE_SIZE) {
get_page(page);
sk->sk_sndmsg_page = page;
}
}
sk->sk_sndmsg_off = off + copy;
}
if (!copied)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
skb_shinfo(skb)->gso_segs = 0;
from += copy;
copied += copy;
if ((seglen -= copy) == 0 && iovlen == 0)
goto out;
/*对于OOB数据,即使一个分片用光,如果还有
send_buff和OOB数据,就继续积累分片*/
if (skb->len < max || (flags & MSG_OOB))
continue;
if (forced_push(tp)) {
tcp_mark_push(tp, skb);
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
} else if (skb == tcp_send_head(sk))
tcp_push_one(sk, mss_now);
continue;
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
mss_now = tcp_send_mss(sk, &size_goal, flags);
}
}
out:
if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle);
release_sock(sk);
return copied;
do_fault:
if (!skb->len) {
tcp_unlink_write_queue(skb, sk);
/* It is the one place in all of TCP, except connection
* reset, where we can be unlinking the send_head.
*/
tcp_check_send_head(sk, skb);
sk_wmem_free_skb(sk, skb);
}
do_error:
if (copied)
goto out;
out_err:
err = sk_stream_error(sk, flags, err);
release_sock(sk);
return err;
}
static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
/*两种urgent point的解析方式:
一是指向urgent data之后的第一个字节
二是执行urgent data的结束字节(RFC1122)
sysctl_tcp_stdurg被设置表示当前采用的是第二种模式
不需要把urgent point -1来指向urgent data的结束字节*/
if (ptr && !sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
/* Ignore urgent data that we've already seen and read.
如果copied_seq已经大于urgent point,那么对于从tcp_rcv_established
来执行的,前面的tcp_validate_incoming已经拒绝了这种报文(
接收窗口外),这里要处理的是哪种情形?*/
if (after(tp->copied_seq, ptr))
return;
/* Do not replay urg ptr.
*
* NOTE: interesting situation not covered by specs.
* Misbehaving sender may send urg ptr, pointing to segment,
* which we already have in ofo queue. We are not able to fetch
* such data and will stay in TCP_URG_NOTYET until will be eaten
* by recvmsg(). Seems, we are not obliged to handle such wicked
* situations. But it is worth to think about possibility of some
* DoSes using some hypothetical application level deadlock.
*/
/* 这种情况什么时候发生?没搞明白*/
if (before(ptr, tp->rcv_nxt))
return;
/* Do we already have a newer (or duplicate) urgent pointer?
如果当前已经进入urg数据读取模式,且urgent point不大于当前
保存的值,那么之前已经开始了读取tp->urg_seq对应的
urgent 数据,无需重复处理了*/
if (tp->urg_data && !after(ptr, tp->urg_seq))
return;
/* Tell the world about our new urgent pointer.*/
sk_send_sigurg(sk);
/* We may be adding urgent data when the last byte read was
* urgent. To do this requires some care. We cannot just ignore
* tp->copied_seq since we would read the last urgent byte again
* as data, nor can we alter copied_seq until this data arrives
* or we break the semantics of SIOCATMARK (and thus sockatmark())
*
* NOTE. Double Dutch. Rendering to plain English: author of comment
* above did something sort of send("A", MSG_OOB); send("B", MSG_OOB);
* and expect that both A and B disappear from stream. This is _wrong_.
* Though this happens in BSD with high probability, this is occasional.
* Any application relying on this is buggy. Note also, that fix "works"
* only in this artificial test. Insert some normal data between A and B and we will
* decline of BSD again. Verdict: it is better to remove to trap
* buggy users.
*/
/*用户下一次要读取的数据就是用户还没有读取的urgent数据
且当前存在新的用户未读取数据*/
if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
!sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
tp->copied_seq++;
if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
__skb_unlink(skb, &sk->sk_receive_queue);
__kfree_skb(skb);
}
}
tp->urg_data = TCP_URG_NOTYET;
tp->urg_seq = ptr;
/* Disable header prediction. */
tp->pred_flags = 0;
}
static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
{
struct tcp_sock *tp = tcp_sk(sk);
/* No URG data to read.
用户已经读取过了*/
if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
tp->urg_data == TCP_URG_READ)
return -EINVAL; /* Yes this is right ! */
if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
return -ENOTCONN;
/*当前的tp->urg_data为合法的数据,可以读取*/
if (tp->urg_data & TCP_URG_VALID) {
int err = 0;
char c = tp->urg_data;
/*标识urgent data已读*/
if (!(flags & MSG_PEEK))
tp->urg_data = TCP_URG_READ;
/* Read urgent data. */
msg->msg_flags |= MSG_OOB;
if (len > 0) {
if (!(flags & MSG_TRUNC))
err = memcpy_toiovec(msg->msg_iov, &c, 1);
len = 1;
} else
msg->msg_flags |= MSG_TRUNC;
return err ? -EFAULT : len;
}
if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
return 0;
/* Fixed the recv(..., MSG_OOB) behaviour. BSD docs and
* the available implementations agree in this case:
* this call should never block, independent of the
* blocking state of the socket.
* Mike <pall@rz.uni-karlsruhe.de>
*/
return -EAGAIN;
}
found_ok_skb:
/* Ok so how much can we use? */
used = skb->len - offset;
if (len < used)
used = len;
/* 当前有urg_data数据*/
if (tp->urg_data) {
u32 urg_offset = tp->urg_seq - *seq;
/*urgent data在当前待拷贝的数据范围内*/
if (urg_offset < used) {
if (!urg_offset) {/*待拷贝的数据就是urgent data,跨过该urgent data,
只给用户读取后面的数据*/
if (!sock_flag(sk, SOCK_URGINLINE)) {
++*seq;
urg_hole++;
offset++;
used--;
if (!used)
goto skip_copy;
}
}
} else/*指定只拷贝urgent data数据之前的,完成后在下一次循环
开始的位置,会退出循环,返回用户;下一次用户调用tcp_recvmsg
就进入到上面的分支了*/
used = urg_offset;
}
}