kk Blog —— 通用基础


date [-d @int|str] [+%s|"+%F %T"]
netstat -ltunp
sar -n DEV 1

IPV6 实现

http://www.cnblogs.com/super-king/p/ipv6_implement.html

code extract from 2.6.24. 在文件 net/ipv6/af_inet6.c 中包含了ipv6协议初始化的主函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
static int __init inet6_init(void)
{
	struct sk_buff *dummy_skb;
	struct list_head *r;
	int err;
	//inet6_skb_parm必须小于等于skb中的cb
	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));

	//初始化tcpv6_prot结构中的一些与slab相关的字段,然后添加到 proto_list 全局连表
	err = proto_register(&tcpv6_prot, 1);
	if (err)
		goto out;
	//udp协议同上
	err = proto_register(&udpv6_prot, 1);
	if (err)
		goto out_unregister_tcp_proto;
	//udp-lite传输协议,主要用于多媒体传输,参考kernel中的 Documentation/networking/udplite.txt
	err = proto_register(&udplitev6_prot, 1);
	if (err)
		goto out_unregister_udp_proto;
	//原始套接字同上
	err = proto_register(&rawv6_prot, 1);
	if (err)
		goto out_unregister_udplite_proto;

	/* Register the socket-side information for inet6_create.  */
	for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) //初始化一个协议连表数组
		INIT_LIST_HEAD(r);
	/* We MUST register RAW sockets before we create the ICMP6, IGMP6, or NDISC control sockets. */
	//根据参数数据结构中标识的协议类型,把这数据结构添加到上面的协议连表数组中
	inet6_register_protosw(&rawv6_protosw);

	/* Register the family here so that the init calls below will be able to create sockets. (?? is this dangerous ??) */
	//注册ipv6协议族,主要是注册socket创建函数
	err = sock_register(&inet6_family_ops);
	if (err)
		goto out_unregister_raw_proto;

	/* Initialise ipv6 mibs */
	err = init_ipv6_mibs(); //所有ipv6相关的统计信息
	if (err)
		goto out_unregister_sock;
	/* ipngwg API draft makes clear that the correct semantics for TCP and UDP is to consider one TCP and UDP instance 
	 * in a host availiable by both INET and INET6 APIs and able to communicate via both network protocols.
	 */
#ifdef CONFIG_SYSCTL
	ipv6_sysctl_register(); // ipv6协议proc条件项初始化
#endif
	//icmp协议注册
	err = icmpv6_init(&inet6_family_ops);
	if (err)
		goto icmp_fail;
	//邻居协议(arp)初始化       
	err = ndisc_init(&inet6_family_ops);
	if (err)
		goto ndisc_fail;
	//igmp协议初始化       
	err = igmp6_init(&inet6_family_ops);
	if (err)
		goto igmp_fail;
	//ipv6协议相关的 netfilter 初始化     
	err = ipv6_netfilter_init();
	if (err)
		goto netfilter_fail;

	/* Create /proc/foo6 entries. */
#ifdef CONFIG_PROC_FS //注册/proc/中协议统计输出项
	err = -ENOMEM;
	if (raw6_proc_init())
		goto proc_raw6_fail;
	if (tcp6_proc_init())
		goto proc_tcp6_fail;
	if (udp6_proc_init())
		goto proc_udp6_fail;
	if (udplite6_proc_init())
		goto proc_udplite6_fail;
	if (ipv6_misc_proc_init())
		goto proc_misc6_fail;
	if (ac6_proc_init())
		goto proc_anycast6_fail;
	if (if6_proc_init())
		goto proc_if6_fail;
#endif
	ip6_route_init(); //ipv6 路由初始化
	ip6_flowlabel_init();//ipv6 中流标记,注册了输出流标记的 proc

	//rtnetlink相关部分和路由模板中一些字段和其他一些功能的初始化
	err = addrconf_init();
	if (err)
		goto addrconf_fail;
	/* Init v6 extension headers. */
	//ipv6 新添加的扩展头初始化,参考ipv6介绍
	ipv6_rthdr_init();
	ipv6_frag_init();
	ipv6_nodata_init();
	ipv6_destopt_init();

	/* Init v6 transport protocols. */
	//最主要的传输层协议初始化
	udpv6_init();
	udplitev6_init();
	tcpv6_init();

	//最后注册ipv6协议,注册协议处理函数
	ipv6_packet_init();
	err = 0;
out:
	return err;
	...... //下面就是错误处理的过程
}

下面我们主要看ipv6协议部分流程,其他部分在各自相关文章中介绍。

ipv6扩展头,路由包头注册

1
2
3
4
5
void __init ipv6_rthdr_init(void)
{
	if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
		printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
};

ipv6扩展头,分片包头注册

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
void __init ipv6_frag_init(void)
{
	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");

	ip6_frags.ctl = &ip6_frags_ctl;
	ip6_frags.hashfn = ip6_hashfn;
	ip6_frags.constructor = ip6_frag_init;
	ip6_frags.destructor = NULL;
	ip6_frags.skb_free = NULL;
	ip6_frags.qsize = sizeof(struct frag_queue);
	ip6_frags.match = ip6_frag_match;
	ip6_frags.frag_expire = ip6_frag_expire;
	inet_frags_init(&ip6_frags);
}
void __init ipv6_nodata_init(void)
{
	if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
		printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
}

ipv6扩展头,目的选项包头注册

1
2
3
4
5
6
7
8
9
10
void __init ipv6_destopt_init(void)
{
	if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
		printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
}
	注册ipv6协议处理函数
void __init ipv6_packet_init(void)
{
	dev_add_pack(&ipv6_packet_type);
}

当netif_receive_skb函数向上层递交skb时会根据协议类型调用相关的协议处理函数,那么就会调用到 ipv6_rcv函数了。

1
2
3
4
5
6
static struct packet_type ipv6_packet_type = {
	.type = __constant_htons(ETH_P_IPV6),
	.func = ipv6_rcv,
	.gso_send_check = ipv6_gso_send_check,
	.gso_segment = ipv6_gso_segment,
};

ipv6协议处理函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
	struct ipv6hdr *hdr;
	u32             pkt_len;
	struct inet6_dev *idev;

	if (dev->nd_net != &init_net) {
		kfree_skb(skb);
		return 0;
	}
	//mac地址是其他主机的包
	if (skb->pkt_type == PACKET_OTHERHOST) {
		kfree_skb(skb);
		return 0;
	}
	rcu_read_lock();
	//获取ipv6相关的配置结构
	idev = __in6_dev_get(skb->dev);

	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES);
	//是否共享,如果是,新clone一个
	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
		IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
		rcu_read_unlock();
		goto out;
	}
	//清空保存扩展头解析结果的数据结构
	memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));

	//保存接收这个数据包的设备索引
	IP6CB(skb)->iif = skb->dst ? ip6_dst_idev(skb->dst)->dev->ifindex : dev->ifindex;

	//有足够的头长度,ipv6是40字节
	if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
		goto err;

	hdr = ipv6_hdr(skb); //获取头

	if (hdr->version != 6) //验证版本
		goto err;

	//传输头(扩展头)在网络头后面
	skb->transport_header = skb->network_header + sizeof(*hdr);
	//保存下一个扩展头协议在ipv6头结构中的偏移
	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
	pkt_len = ntohs(hdr->payload_len); //ipv6负载数据长度

	/* pkt_len may be zero if Jumbo payload option is present */
	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { //没有使用扩展头逐个跳段选项
		if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { //数据长度不对
			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS);
			goto drop;
		}
		//如果skb->len > (pkt_len + sizeof(struct ipv6hdr))试着缩小skb->len的长度
		//相对ipv4来说简单多了,自己看吧
		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
			goto drop;
		}
		hdr = ipv6_hdr(skb); //重新获取ip头
	}
	if (hdr->nexthdr == NEXTHDR_HOP) { //使用了扩展头逐个跳段选项
		if (ipv6_parse_hopopts(skb) < 0) {//处理这个选项
			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
			rcu_read_unlock();
			return 0;
		}
	}
	rcu_read_unlock();
	//进入ipv6的netfilter然后调用ip6_rcv_finish
	return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
err:
	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
drop:
	rcu_read_unlock();
	kfree_skb(skb);
out:
	return 0;
}

解析扩展头逐个跳段中的巨量负载选项

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
int ipv6_parse_hopopts(struct sk_buff *skb)
{
	struct inet6_skb_parm *opt = IP6CB(skb); //获取扩展头结果结构
	/* skb_network_header(skb) is equal to skb->data, and skb_network_header_len(skb) is always equal to
	 * sizeof(struct ipv6hdr) by definition of hop-by-hop options.
	 */
	//验证数据有足够的长度
	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
					//下面的意思是取得扩展首部中的长度
					((skb_transport_header(skb)[1] + 1) << 3)))) {
		kfree_skb(skb);
		return -1;
	}
	opt->hop = sizeof(struct ipv6hdr); //40字节
	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { //实际的解析工作
		//把传输头移动到扩展首部之后
		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
		opt = IP6CB(skb);
		opt->nhoff = sizeof(struct ipv6hdr); //进行了ipv6扩展头解析,保存下一个扩展头协议字段的偏移
		return 1;
	}
	return -1;
}

解析tlv编码的扩展选项头

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
{
	struct tlvtype_proc *curr;
	const unsigned char *nh = skb_network_header(skb); //获取网络头
	int off = skb_network_header_len(skb); //获取网络头长度
	int len = (skb_transport_header(skb)[1] + 1) << 3; //首部扩展头长度

	if (skb_transport_offset(skb) + len > skb_headlen(skb)) //长度错误
		goto bad;
	off += 2; //跳过下一个首部和首部扩展长度这两个字节
	len -= 2;

	while (len > 0) {
		int optlen = nh[off + 1] + 2; //获取选项数据长度 + 2 (2是选项类型和选项数据长度两字节)
		switch (nh[off]) { //选项类型
			case IPV6_TLV_PAD0: //Pad1选项
				optlen = 1;
				break;
			case IPV6_TLV_PADN://PadN选项
				break;
			default: //其他选项
				if (optlen > len)
					goto bad;

				for (curr = procs; curr->type >= 0; curr++) {
					if (curr->type == nh[off]) { //类型匹配,调用参数函数处理,参考下面ipv6选项处理
						/* type specific length/alignment checks will be performed in the func(). */
						if (curr->func(skb, off) == 0)
							return 0;
						break;
					}
				}
				if (curr->type < 0) {
					if (ip6_tlvopt_unknown(skb, off) == 0) //处理未知选项
						return 0;
				}
				break;
		}
		off += optlen; //偏移增加,这样到下一个选项
		len -= optlen; //长度递减
	}
	if (len == 0)
		return 1; //正确解析完毕
bad:
	kfree_skb(skb);
	return 0;
}

处理未知的选项

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
{
	//根据选项类型标识符的要求进行处理
	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
		case 0: /* ignore */
			return 1;
		case 1: /* drop packet */
			break;
		case 3: /* Send ICMP if not a multicast address and drop packet */
			/* Actually, it is redundant check. icmp_send will recheck in any case. */
			if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) //目的是多播地址
				break;
		case 2: /* send ICMP PARM PROB regardless and drop packet */
			//给包的源地址发送一个 ICMP "参数存在问题", 编码 2 的报文, 指针指向无法识别的选项类型
			icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
			return 0;
	}
	kfree_skb(skb);
	return 0;
}

到这需要解释一下,上面解析ipv6选项只是解析了第一层的扩展头,在后面可能还有其他扩展头会在后面解析。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
inline int ip6_rcv_finish( struct sk_buff *skb)
{
	if (skb->dst == NULL) //没有路由,进行路由查找
		ip6_route_input(skb); //路由部分将在路由实现文章中介绍

	return dst_input(skb);
}
static inline int dst_input(struct sk_buff *skb)
{
	int err;
	for (;;) {
		err = skb->dst->input(skb); //调用路由的输入函数
		if (likely(err == 0))
			return err;

		/* Oh, Jamal... Seems, I will not forgive you this mess. :-) */
		if (unlikely(err != NET_XMIT_BYPASS))
			return err;
	}
}

现在我们假设包是到本地的,那么上面的input函数就是

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
int ip6_input(struct sk_buff *skb)
{
	//进入ipv6 netfilter NF_IP6_LOCAL_IN hook 然后调用 ip6_input_finish
	return NF_HOOK(PF_INET6, NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish);
}
static int ip6_input_finish(struct sk_buff *skb)
{
	struct inet6_protocol *ipprot;
	struct sock *raw_sk;
	unsigned int nhoff;
	int nexthdr;
	u8 hash;
	struct inet6_dev *idev;

	/* Parse extension headers */
	rcu_read_lock();
resubmit:
	idev = ip6_dst_idev(skb->dst);
	//将skb->data指针移动到传输层头
	if (!pskb_pull(skb, skb_transport_offset(skb)))
		goto discard;

	nhoff = IP6CB(skb)->nhoff;
	nexthdr = skb_network_header(skb)[nhoff];//下一个扩展头协议

	//处理原始sock
	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
		raw_sk = NULL;

	//向上层协议栈递交数据,看初始化时注册的一些协议,主要是tcp,udp等,还包括一些ip扩展头的处理
	hash = nexthdr & (MAX_INET_PROTOS - 1);
	if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
		int ret;
		if (ipprot->flags & INET6_PROTO_FINAL) {
			struct ipv6hdr *hdr;
			/* Free reference early: we don't need it any more,                        
			   and it may hold ip_conntrack module loaded indefinitely. */
			nf_reset(skb);

			skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb));
			hdr = ipv6_hdr(skb);
			if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, &hdr->saddr)
					&& !ipv6_is_mld(skb, nexthdr))
				goto discard;
		}
		//处理 IPSEC v6 的相关部分
		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
			goto discard;

		ret = ipprot->handler(skb); //上层协议处理,看下面ipv6扩展头处理
		if (ret > 0)
			goto resubmit; //重新处理
		else if (ret == 0)
			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
	} else { //没有找到上层处理函数
		if (!raw_sk) {
			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
				IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS);
				icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff, skb->dev);
			}
		} else
			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS);
		kfree_skb(skb);
	}
	rcu_read_unlock();
	return 0;
discard:
	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
	rcu_read_unlock();
	kfree_skb(skb);
	return 0;
}

ipv6选项处理

1
2
3
4
5
6
7
8
9
10
11
static struct tlvtype_proc tlvprochopopt_lst[] = {
	{
		.type   = IPV6_TLV_ROUTERALERT,
		.func   = ipv6_hop_ra,
	},
	{
		.type   = IPV6_TLV_JUMBO,
		.func   = ipv6_hop_jumbo,
	},
	{ -1, }
};

解析路由警告选项

1
2
3
4
5
6
7
8
9
10
11
12
static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
{
	const unsigned char *nh = skb_network_header(skb); //获取网络头

	if (nh[optoff + 1] == 2) { //路由警告选项长度必须是2 ? rfc 要求是 4
		IP6CB(skb)->ra = optoff; //记录警告类型
		return 1;
	}
	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", nh[optoff + 1]);
	kfree_skb(skb);
	return 0;
}

解析jumbo frame选项

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
	const unsigned char *nh = skb_network_header(skb);
	u32 pkt_len;
	//选项数据长度必须是4,选项类型必须是 0xc2, &3 后必须是2
	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
		LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]);
		IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS);
		goto drop;
	}
	pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); //获取整个负载长度
	if (pkt_len <= IPV6_MAXPLEN) { //小于65535 是不对地
		IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS);
		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
		return 0;
	}
	if (ipv6_hdr(skb)->payload_len) { //原ipv6头中就不应该有负载长度了
		IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS);
		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
		return 0;
	}
	if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { //长度超出了 skb 的实际长度
		IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INTRUNCATEDPKTS);
		goto drop;
	}
	//如果必要试图缩减 skb 的长度
	if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
		goto drop;

	return 1;
drop:
	kfree_skb(skb);
	return 0;
}

目的选项处理

1
2
3
4
5
6
7
8
9
static struct tlvtype_proc tlvprocdestopt_lst[] = {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
	{
		.type   = IPV6_TLV_HAO,
		.func   = ipv6_dest_hao,
	},
#endif
	{-1,    NULL}
};

解析目的选项

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
{
	struct ipv6_destopt_hao *hao;
	struct inet6_skb_parm *opt = IP6CB(skb);
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
	struct in6_addr tmp_addr;
	int ret;

	if (opt->dsthao) { //已经处理
		LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
		goto discard;
	}
	opt->dsthao = opt->dst1;
	opt->dst1 = 0;

	//获取网络头后面的选项部分
	hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);

	if (hao->length != 16) { //长度要求
		LIMIT_NETDEBUG(KERN_DEBUG "hao invalid option length = %d\n", hao->length);
		goto discard;
	}
	if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { //地址不是单播
		LIMIT_NETDEBUG(KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr));
		goto discard;
	}
	//IPSEC相关
	ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
	if (unlikely(ret < 0))
		goto discard;

	if (skb_cloned(skb)) { //如果包是cloned
		//分配新的内存数据
		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
			goto discard;

		//重新指向各头
		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
		ipv6h = ipv6_hdr(skb);
	}
	if (skb->ip_summed == CHECKSUM_COMPLETE)
		skb->ip_summed = CHECKSUM_NONE;

	//把ip头中的源地址与选项中的地址交换
	ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
	ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
	ipv6_addr_copy(&hao->addr, &tmp_addr);

	if (skb->tstamp.tv64 == 0)
		__net_timestamp(skb); //记录时间截

	return 1;
discard:
	kfree_skb(skb);
	return 0;
}

ipv6扩展头处理

我们只介绍根ipv6扩展头相关的实现,像其他的扩展头(tcp, udp)等虽然也是叫扩展头但实际是传输层的内容,将在其他文章中介绍。

路由扩展首部

1
2
3
4
5
6
7
8
struct ipv6_rt_hdr {
	__u8            nexthdr;
	__u8            hdrlen;
	__u8            type;
	__u8            segments_left;

	/* type specific data variable length field */
};

路由扩展首部处理结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
static struct inet6_protocol rthdr_protocol = {
	.handler        =       ipv6_rthdr_rcv,
	.flags          =       INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
};
static int ipv6_rthdr_rcv(struct sk_buff *skb)
{
	struct inet6_skb_parm *opt = IP6CB(skb);
	struct in6_addr *addr = NULL;
	struct in6_addr daddr;
	struct inet6_dev *idev;
	int n, i;
	struct ipv6_rt_hdr *hdr;
	struct rt0_hdr *rthdr;
	int accept_source_route = ipv6_devconf.accept_source_route;

	idev = in6_dev_get(skb->dev); //包进入设备
	if (idev) {
		if (accept_source_route > idev->cnf.accept_source_route) //默认数量大于了手动调节(proc中)的数量
			accept_source_route = idev->cnf.accept_source_route;
		in6_dev_put(idev);
	}
	//skb长度和内存空间正确
	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) +
					((skb_transport_header(skb)[1] + 1) << 3)))) {
		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
		kfree_skb(skb);
		return -1;
	}
	hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); //路由扩展头
	//是到多播地址或硬件地址不是到本机的地址
	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) {
		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
		kfree_skb(skb);
		return -1;
	}
looped_back:
	if (hdr->segments_left == 0) { //根据rfc要求 分段剩余为0
		switch (hdr->type) {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
			case IPV6_SRCRT_TYPE_2:
				/* Silently discard type 2 header unless it was processed by own */
				if (!addr) {
					IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
					kfree_skb(skb);
					return -1;
				}
				break;
#endif
			default:
				break;

		}
		opt->lastopt = opt->srcrt = skb_network_header_len(skb);
		skb->transport_header += (hdr->hdrlen + 1) << 3; //下一个传输头的位置
		opt->dst0 = opt->dst1;
		opt->dst1 = 0;
		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); //记录下一个头数据相对网络头的偏移量
		return 1;
	}
	switch (hdr->type) {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
		case IPV6_SRCRT_TYPE_2:
			if (accept_source_route < 0)
				goto unknown_rh;
			/* Silently discard invalid RTH type 2 */
			if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
				IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
				kfree_skb(skb);
				return -1;
			}
			break;
#endif
		default:
			goto unknown_rh;
	}
	/* This is the routing header forwarding algorithm from RFC 2460, page 16. */

	n = hdr->hdrlen >> 1; //计算路由首部中的地址数量
	if (hdr->segments_left > n) {
		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb)));
		return -1;
	}
	/* We are about to mangle packet header. Be careful!                                       
	   Do not damage packets queued somewhere.  */
	if (skb_cloned(skb)) {
		/* the copy is a forwarded packet */
		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS);
			kfree_skb(skb);
			return -1;
		}
		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
	}
	if (skb->ip_summed == CHECKSUM_COMPLETE)
		skb->ip_summed = CHECKSUM_NONE;

	i = n - --hdr->segments_left; //计算地址向量(地址列表)中要"访问"的下一个地址

	rthdr = (struct rt0_hdr *) hdr;
	addr = rthdr->addr; //指向地址列表首部
	addr += i - 1; //移动到下一个地址

	switch (hdr->type) {
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
		case IPV6_SRCRT_TYPE_2:
			if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) {
				IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
				kfree_skb(skb);
				return -1;
			}
			if (!ipv6_chk_home_addr(addr)) {
				IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
				kfree_skb(skb);
				return -1;
			}
			break;
#endif
		default:
			break;
	}
	if (ipv6_addr_is_multicast(addr)) { //这个地址是多播地址
		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
		kfree_skb(skb);
		return -1;
	}
	//交换 IPv6 目的地址和这个地址
	ipv6_addr_copy(&daddr, addr);
	ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
	dst_release(xchg(&skb->dst, NULL));

	ip6_route_input(skb); //路由查找处理,将在其他文章中介绍

	if (skb->dst->error) {
		skb_push(skb, skb->data - skb_network_header(skb));
		dst_input(skb);
		return -1;
	}

	if (skb->dst->dev->flags & IFF_LOOPBACK) { //路由查找后要发送到的目的设备是回环
		if (ipv6_hdr(skb)->hop_limit <= 1) { //跳数限制小于1
			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
			//给源地址发送一个 ICMP "超时 – 传输超过跳数限制" 的报文, 并且抛弃此包
			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev);
			kfree_skb(skb);
			return -1;
		}
		ipv6_hdr(skb)->hop_limit--;
		goto looped_back;
	}
	//将data之中移动到网络头
	skb_push(skb, skb->data - skb_network_header(skb));
	dst_input(skb); //这时包应该被转发了
	return -1;
unknown_rh:
	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb));
	return -1;
}

ipv6分配包扩展首部处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
static struct inet6_protocol frag_protocol =
{
	.handler        =       ipv6_frag_rcv,
	.flags          =       INET6_PROTO_NOPOLICY,
};
static int ipv6_frag_rcv(struct sk_buff *skb)
{
	struct frag_hdr *fhdr;
	struct frag_queue *fq;
	struct ipv6hdr *hdr = ipv6_hdr(skb);

	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);

	/* Jumbo payload inhibits frag. header */
	if (hdr->payload_len == 0) { //是Jumbo payload,不是分片包
		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
		return -1;
	}
	//有碎片头空间
	if (!pskb_may_pull(skb, (skb_transport_offset(skb) + sizeof(struct frag_hdr)))) {
		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
		return -1;
	}
	hdr = ipv6_hdr(skb);
	fhdr = (struct frag_hdr *)skb_transport_header(skb); //分片头

	if (!(fhdr->frag_off & htons(0xFFF9))) { //没有分片偏移,不是分片包
		/* It is not a fragmented frame */
		skb->transport_header += sizeof(struct frag_hdr); //传输头向后移动到下一个头
		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
		return 1;
	}
	if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) //内存使用超过限制
		ip6_evictor(ip6_dst_idev(skb->dst));

	//查找或创建分片队列头
	if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) {
		int ret;
		spin_lock(&fq->q.lock);
		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); //入队重组
		spin_unlock(&fq->q.lock);
		fq_put(fq);
		return ret;
	}
	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
	kfree_skb(skb);
	return -1;
}
static __inline__ struct frag_queue * fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev)
{
	struct inet_frag_queue *q;
	struct ip6_create_arg arg;
	unsigned int hash;

	arg.id = id;
	arg.src = src;
	arg.dst = dst;
	hash = ip6qhashfn(id, src, dst); //id,源,目的进行 hash

	q = inet_frag_find(&ip6_frags, &arg, hash); //查找或创建
	if (q == NULL)
		goto oom;

	return container_of(q, struct frag_queue, q); //成功返回
oom: //没内存了
	IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
	return NULL;
}
struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, unsigned int hash)
{
	struct inet_frag_queue *q;
	struct hlist_node *n;

	read_lock(&f->lock);
	hlist_for_each_entry(q, n, &f->hash[hash], list) { //在hash桶中查找

		if (f->match(q, key)) { //调用匹配函数进行匹配,具体函数很简单参考初始化时的ipv6_frag_init函数
			atomic_inc(&q->refcnt);
			read_unlock(&f->lock);
			return q;
		}
	}
	//没有找到就创建一个
	return inet_frag_create(f, key, hash);
}

创建分片队列

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, void *arg, unsigned int hash)
{
	struct inet_frag_queue *q;

	q = inet_frag_alloc(f, arg); //分配一个
	if (q == NULL)
		return NULL;
	//添加到 hash 表
	return inet_frag_intern(q, f, hash, arg);
}
static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg)
{
	struct inet_frag_queue *q;

	q = kzalloc(f->qsize, GFP_ATOMIC); //分配一个队列头,大小是 sizeof(struct frag_queue)
	if (q == NULL)
		return NULL;

	f->constructor(q, arg); //拷贝地址和 id 到队列头结构中
	atomic_add(f->qsize, &f->mem);
	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
	spin_lock_init(&q->lock);
	atomic_set(&q->refcnt, 1);
	return q;
}
static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, struct inet_frags *f, unsigned int hash, void *arg)
{
	struct inet_frag_queue *qp;
#ifdef CONFIG_SMP
	struct hlist_node *n;
#endif

	write_lock(&f->lock);
#ifdef CONFIG_SMP
	//其他cpu可能已经创建了一个,所以要再次检查
	hlist_for_each_entry(qp, n, &f->hash[hash], list) {
		if (f->match(qp, arg)) { //已经创建
			atomic_inc(&qp->refcnt);
			write_unlock(&f->lock);
			qp_in->last_in |= COMPLETE;
			inet_frag_put(qp_in, f); //释放新分配的
			return qp;

		}
	}
#endif
	qp = qp_in;
	if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) //启动定时器
		atomic_inc(&qp->refcnt);

	//增加引用计数,然后添加到hash表
	atomic_inc(&qp->refcnt);
	hlist_add_head(&qp->list, &f->hash[hash]);
	list_add_tail(&qp->lru_list, &f->lru_list);
	f->nqueues++;
	write_unlock(&f->lock);
	return qp;
}

入队重组

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff)
{
	struct sk_buff *prev, *next;
	struct net_device *dev;
	int offset, end;

	if (fq->q.last_in & COMPLETE) //重组已经完成
		goto err;

	//分片开始位置
	offset = ntohs(fhdr->frag_off) & ~0x7;//偏移必须8字节对齐
	//分片在整个包中的结束位置 包负载长度 - 分片头长度
	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -  ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));

	//结束位置 > 65535
	if ((unsigned int)end > IPV6_MAXPLEN) {
		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb)));
		return -1;
	}
	//校验和已经完成
	if (skb->ip_summed == CHECKSUM_COMPLETE) {
		const unsigned char *nh = skb_network_header(skb);
		//减去分片包头的校验和
		skb->csum = csum_sub(skb->csum, csum_partial(nh, (u8 *)(fhdr + 1) - nh, 0));
	}
	//最后一个碎片包
	if (!(fhdr->frag_off & htons(IP6_MF))) {
		/* If we already have some bits beyond end or have different end, the segment is corrupted. */
		if (end < fq->q.len || ((fq->q.last_in & LAST_IN) && end != fq->q.len)) //分片出现错误
			goto err;

		fq->q.last_in |= LAST_IN; //标识最后一个分片
		fq->q.len = end; //记录包总长度
	} else {
		/* Check if the fragment is rounded to 8 bytes. Required by the RFC. */
		if (end & 0x7) { //碎片结尾也需要8字节对齐
			/* RFC2460 says always send parameter problem in this case. -DaveM */
			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), PSTATS_MIB_INHDRERRORS);
			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len));
			return -1;
		}
		if (end > fq->q.len) {
			/* Some bits beyond end -> corruption. */
			if (fq->q.last_in & LAST_IN)
				goto err;
			fq->q.len = end; //记录已经得到的碎片的最大长度
		}
	}
	if (end == offset) //开始 = 结束
		goto err;

	//skb->data 指向碎片首部头后数据部分
	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
		goto err;
	//如果需要缩短skb的内存长度
	if (pskb_trim_rcsum(skb, end - offset))
		goto err;

	//找出碎片所在位置
	prev = NULL;
	for(next = fq->q.fragments; next != NULL; next = next->next) {
		if (FRAG6_CB(next)->offset >= offset)
			break;  /* bingo! */
		prev = next;
	}
	if (prev) { //有前一个碎片
		//前一个碎片 (开始 + 长度) - 这个碎片的开始. 计算出重叠部分
		int i = (FRAG6_CB(prev)->offset + prev->len) - offset;
		if (i > 0) { //有重叠
			offset += i; //调整这个碎片的开始位置
			if (end <= offset) //调整后出错
				goto err;
			if (!pskb_pull(skb, i))//skb->data += i;
				goto err;
			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
				skb->ip_summed = CHECKSUM_NONE;
		}
	}
	//有下一个碎片,且开始位置 < 这个碎片的结束位置
	while (next && FRAG6_CB(next)->offset < end) {
		//这个碎片的结束位置  - 下一个碎片的开始位置,计算重叠
		int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */
		if (i < next->len) { //重叠长度 < 下一个碎片的长度
			if (!pskb_pull(next, i)) //next->data += i;
				goto err;

			FRAG6_CB(next)->offset += i;    //下一个碎片开始位置调整
			fq->q.meat -= i; //总长度减少
			if (next->ip_summed != CHECKSUM_UNNECESSARY)
				next->ip_summed = CHECKSUM_NONE;
			break;

		} else { //这个碎片完全复盖了下一个碎片
			struct sk_buff *free_it = next; //释放这个碎片
			next = next->next;//调整下一个碎片指针
			//调整队列指针
			if (prev)
				prev->next = next;
			else
				fq->q.fragments = next;

			fq->q.meat -= free_it->len;
			frag_kfree_skb(free_it, NULL); //释放被复盖的包
		}
	}
	FRAG6_CB(skb)->offset = offset; //这个碎片包记录自己的开始位置

	//插入这个碎片到队列
	skb->next = next;
	if (prev)
		prev->next = skb;
	else
		fq->q.fragments = skb;

	dev = skb->dev;
	if (dev) {
		fq->iif = dev->ifindex;
		skb->dev = NULL;
	}
	fq->q.stamp = skb->tstamp;
	fq->q.meat += skb->len; //累加总长度
	atomic_add(skb->truesize, &ip6_frags.mem);

	if (offset == 0) { //偏移为0
		fq->nhoffset = nhoff;
		fq->q.last_in |= FIRST_IN; //标识开始碎片
	}
	//碎片已经聚齐,记录长度 = 包中标识的长度
	if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len)
		return ip6_frag_reasm(fq, prev, dev); //重组
	//没有聚齐,移动队列连表到lru连表尾部
	write_lock(&ip6_frags.lock);
	list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
	write_unlock(&ip6_frags.lock);
	return -1;
err:
	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
	kfree_skb(skb);
	return -1;
}

重组ip头

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
	struct sk_buff *fp, *head = fq->q.fragments;
	int    payload_len;
	unsigned int nhoff;

	fq_kill(fq); //把这个重组队列出队

	/* Make the one we just received the head. */
	if (prev) {
		//下面是把head指向的skb复制到fp,然后把fp插入到head指向的位置
		head = prev->next;
		fp = skb_clone(head, GFP_ATOMIC);

		if (!fp)
			goto out_oom;


		fp->next = head->next;
		prev->next = fp;
		//把真正的头skb复制到head指针的skb
		skb_morph(head, fq->q.fragments);
		head->next = fq->q.fragments->next;

		kfree_skb(fq->q.fragments);//释放原来的头
		fq->q.fragments = head;
	}
	/* Unfragmented part is taken from the first segment. */
	//计算负载总长度
	payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len -  sizeof(struct frag_hdr));
	if (payload_len > IPV6_MAXPLEN) //超过65535
		goto out_oversize;

	/* Head of list must not be cloned. */
	//如果skb被克隆,从新分配他的data
	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
		goto out_oom;

	/* If the first fragment is fragmented itself, we split it to two chunks: the first with data and paged part
	 * and the second, holding only fragments.
	 */
	if (skb_shinfo(head)->frag_list) {//如果头自己已经被分片
		struct sk_buff *clone;
		int i, plen = 0;

		if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
			goto out_oom;

		//把这个clone插入到头后               
		clone->next = head->next;
		head->next = clone;
		//把头的分片给这个clone
		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
		skb_shinfo(head)->frag_list = NULL;
		//头使用了页面,计算总长度
		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
			plen += skb_shinfo(head)->frags[i].size;

		clone->len = clone->data_len = head->data_len - plen;
		head->data_len -= clone->len;
		head->len -= clone->len;
		clone->csum = 0;
		clone->ip_summed = head->ip_summed;
		atomic_add(clone->truesize, &ip6_frags.mem);
	}
	/* We have to remove fragment header from datagram and to relocate                         
	 * header in order to calculate ICV correctly. */
	nhoff = fq->nhoffset;
	//把传输头(分片头)中的下一个头字段值赋给网络头中的下一个头字段
	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
	//把分片首部复盖掉
	memmove(head->head + sizeof(struct frag_hdr), head->head, (head->data - head->head) - sizeof(struct frag_hdr));
	//调整相应的各个层的头位置
	head->mac_header += sizeof(struct frag_hdr);
	head->network_header += sizeof(struct frag_hdr);

	skb_shinfo(head)->frag_list = head->next; //保存碎片连表
	skb_reset_transport_header(head);//重新调整网络头,现在指向分片头后的头
	skb_push(head, head->data - skb_network_header(head));//使head->data指向网络头
	atomic_sub(head->truesize, &ip6_frags.mem);

	for (fp = head->next; fp; fp = fp->next) { //统计分片总长度
		head->data_len += fp->len;
		head->len += fp->len;
		if (head->ip_summed != fp->ip_summed)
			head->ip_summed = CHECKSUM_NONE;
		else if (head->ip_summed == CHECKSUM_COMPLETE)
			head->csum = csum_add(head->csum, fp->csum); //添加各分片的累加和

		head->truesize += fp->truesize;
		atomic_sub(fp->truesize, &ip6_frags.mem);
	}
	head->next = NULL;
	head->dev = dev;
	head->tstamp = fq->q.stamp;
	ipv6_hdr(head)->payload_len = htons(payload_len); //总长度
	IP6CB(head)->nhoff = nhoff;

	/* Yes, and fold redundant checksum back. 8) */
	if (head->ip_summed == CHECKSUM_COMPLETE) //添加网络头累加和
		head->csum = csum_partial(skb_network_header(head), skb_network_header_len(head), head->csum);

	rcu_read_lock();
	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
	rcu_read_unlock();
	fq->q.fragments = NULL;
	return 1;
	...... //下面是错误处理
}

无数据扩展头

1
2
3
4
5
6
7
8
9
static struct inet6_protocol nodata_protocol = {
	.handler        =       ipv6_nodata_rcv,
	.flags          =       INET6_PROTO_NOPOLICY,
};
static int ipv6_nodata_rcv(struct sk_buff *skb)
{
	kfree_skb(skb);
	return 0;
}

目的选项首部处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
static struct inet6_protocol destopt_protocol = {
	.handler        =       ipv6_destopt_rcv,
	.flags          =       INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
};
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
	struct inet6_skb_parm *opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
	__u16 dstbuf;
#endif
	struct dst_entry *dst;
	//长度验证
	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) +
					((skb_transport_header(skb)[1] + 1) << 3)))) {
		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
		kfree_skb(skb);
		return -1;
	}
	opt->lastopt = opt->dst1 = skb_network_header_len(skb); //网络头长度
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
	dstbuf = opt->dst1;
#endif
	dst = dst_clone(skb->dst); //增加dst的引用计数
	//解析tlv,上面已经看到过了
	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
		dst_release(dst);
		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; //调整网络头位置
		opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
		opt->nhoff = dstbuf;
#else
		opt->nhoff = opt->dst1;
#endif
		return 1;
	}
	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
	dst_release(dst);
	return -1;
}

kernel, net

« linux内核中tcp连接的断开处理 Android系统典型bootloader分析 »