void tcp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
int data_was_unread = 0;
int state;
lock_sock(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
//如果处于tcp_listen说明将要关闭的这个socket是一个服务端的主socket。
if (sk->sk_state == TCP_LISTEN) {
//设置sock状态.
tcp_set_state(sk, TCP_CLOSE);
//这个函数主要用来清理半连接队列(下面会简要分析这个函数)
/* Special case. */
inet_csk_listen_stop(sk);
//处理要关闭的sock
goto adjudge_to_death;
}
//遍历sk_receive_queue也就是输入buf队列。然后统计还没有读取的数据。
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
tcp_hdr(skb)->fin;
data_was_unread += len;
//free这个skb
__kfree_skb(skb);
}
sk_mem_reclaim(sk);
//第一个if主要是实现了rfc2525的2.17,也就是关闭的时候,如果接收buf中有未读数据,则发送一个rst给对端。(下面有摘抄相关内容)
if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
//设置状态
tcp_set_state(sk, TCP_CLOSE);
//发送rst
tcp_send_active_reset(sk, GFP_KERNEL);
}
//第二个if主要是判断so_linger套接字,并且超时时间为0。此时我们就直接丢掉所有的发送缓冲区中的数据
else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
//调用tcp_disconnect,这个函数主要用来断开和对端的连接,这个函数下面会介绍。
sk->sk_prot->disconnect(sk, 0);
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
}
//这个函数主要用来判断是否需要发送fin,也就是判断状态。下面我会详细介绍这个函数。
else if (tcp_close_state(sk)) {
//发送fin.
tcp_send_fin(sk);
}
//等待一段时间。这里的timeout,如果有设置so_linger的话就是l_linger.这里主要是等待发送缓冲区的buf发送(如果超时时间不为0).
sk_stream_wait_close(sk, timeout);
........................
}
rfc2525的2.17的介绍:
123
When an application closes a connection in such a way that it can no longer read any received data,
the TCP SHOULD, per section 4.2.2.13 of RFC 1122, send a RST if there is any unread received data,
or if any new data is received. A TCP that fails to do so exhibits "Failure to RST on close with data pending".
void tcp_send_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
//取得写bufer的尾部。
struct sk_buff *skb = tcp_write_queue_tail(sk);
int mss_now;
/* Optimization, tack on the FIN if we have a queue of
* unsent frames. But be careful about outgoing SACKS
* and IP options.
*/
mss_now = tcp_current_mss(sk);
//如果发送队列不为空,此时我们只需要设置sk buffer的标记位(也就是tcp报文的控制位为fin),可以看到我们是加到写buffer的尾部,这里是为了能尽量将写buffer中的数据全部传出)
if (tcp_send_head(sk) != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
TCP_SKB_CB(skb)->end_seq++;
tp->write_seq++;
} else {
..................................
//到这里标明发送缓冲区位空,因此我们需要新建一个sk buffer,然后设置标记位,并加入到写buffer。
skb_reserve(skb, MAX_TCP_HEADER);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
tcp_init_nondata_skb(skb, tp->write_seq,
TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
tcp_queue_skb(sk, skb);
}
//发送写缓冲区中的数据。
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
}
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
int nonagle)
{
struct sk_buff *skb = tcp_send_head(sk);
if (!skb)
return;
/* If we are closed, the bytes will have to remain here.
* In time closedown will finish, we empty the write queue and
* all will be happy.
*/
if (unlikely(sk->sk_state == TCP_CLOSE))
return;
//发送数据,这里关闭了nagle。也就是立即将数据全部发送出去(我前面的blog有详细解释这个函数).
if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC))
tcp_check_probe_timer(sk);
}
int inet_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
int err = 0;
/* This should really check to make sure
* the socket is a TCP socket. (WHY AC...)
*/
//这里要注意每个how都是加1的,这说明在内核里读写是为1,2,3
how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
1->2 bit 2 snds.
2->3 */
//判断how的合法性。
if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */
return -EINVAL;
//锁住sock
lock_sock(sk);
//SS_CONNECTING说明这个sock的连接正在处理中。state域表示socket当前的内部状态
if (sock->state == SS_CONNECTING) {
//如果状态为这几个状态,说明是处于半连接处理阶段,此时设置状态为SS_DISCONNECTING
if ((1 << sk->sk_state) &
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
sock->state = SS_DISCONNECTING;
else
//否则设置为连接完毕
sock->state = SS_CONNECTED;
}
//除过TCP_LISTEN以及TCP_SYN_SENT状态外的其他状态最终都会进入sk->sk_prot->shutdown也就是tcp_shutdown函数。
switch (sk->sk_state) {
//如果状态为tco_close则设置错误号,然后进入default处理
case TCP_CLOSE:
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
POLLHUP, even on eg. unconnected UDP sockets -- RR */
default:
sk->sk_shutdown |= how;
if (sk->sk_prot->shutdown)
sk->sk_prot->shutdown(sk, how);
break;
/* Remaining two branches are temporary solution for missing
* close() in multithreaded environment. It is _not_ a good idea,
* but we have no choice until close() is repaired at VFS level.
*/
case TCP_LISTEN:
//如果不为SHUT_RD则跳出switch,否则进入tcp_syn_sent的处理。
if (!(how & RCV_SHUTDOWN))
break;
/* Fall through */
case TCP_SYN_SENT:
//断开连接,然后设置state
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
break;
}
/* Wake up anyone sleeping in poll. */
//唤醒阻塞在这个socket上的进程,这里是为了将读缓冲区的数据尽量读完。
sk->sk_state_change(sk);
release_sock(sk);
return err;
}
void tcp_shutdown(struct sock *sk, int how)
{
/* We need to grab some memory, and put together a FIN,
* and then put it into the queue to be sent.
* Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
*/
//如果为SHUT_RD则直接返回。
if (!(how & SEND_SHUTDOWN))
return;
/* If we've already sent a FIN, or it's a closed state, skip this. */
//这里英文注释很详细我就不多解释了。
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
//和tcp_close那边处理一样
if (tcp_close_state(sk))
tcp_send_fin(sk);
}
}