kk Blog —— 通用基础


date [-d @int|str] [+%s|"+%F %T"]
netstat -ltunp
sar -n DEV 1

TCP包增一个ICMP头

发送加头,接收解头。checksum失效,需要额外处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/sctp.h>
#include <linux/icmp.h>
#include <linux/slab.h>

#include <net/ip.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h>                   /* for icmp_send */
#include <net/route.h>
#include <net/ip6_checksum.h>
#include <net/netns/generic.h>      /* net_generic() */

#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>

#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
#include <linux/netfilter_ipv6.h>
#include <net/ip6_route.h>
#endif

#include <net/ip_vs.h>
#include <linux/dns_resolver.h>



#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/icmp.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/kallsyms.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/neighbour.h>
#include <net/netevent.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32))
#include <net/net_namespace.h>
#endif
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>

#include <linux/inet.h>
#include <linux/skbuff.h>
#include <linux/kthread.h>

int icmp_port = 80;
module_param(icmp_port, int, 0644);

struct addhdr {
	u32 saddr, daddr;
	u16 sport, dport;
	u16 len;
	u16 magic;
};

static unsigned int local_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
	struct iphdr *iph = ip_hdr(skb);
	struct tcphdr *th;
	struct icmphdr *icmp;
	struct addhdr *add;
	int delta;

	iph = ip_hdr(skb);
	th = tcp_hdr(skb);
	if (iph->protocol != IPPROTO_TCP)
		return NF_ACCEPT;

	if (ntohs(th->source) != icmp_port && ntohs(th->dest) != icmp_port)
		return NF_ACCEPT;

	if (skb->len + sizeof(struct icmphdr) + sizeof(struct addhdr) > 1500)
		return NF_ACCEPT;

	delta = sizeof(struct icmphdr) + sizeof(struct addhdr) + sizeof(struct ethhdr) - skb_headroom(skb);
	if (delta > 0 && pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
		return NF_ACCEPT;

	iph = ip_hdr(skb);
	th = tcp_hdr(skb);

	if (skb->ip_summed != CHECKSUM_COMPLETE) {
		th->check = 0;
		skb->csum = 0;
		th->check = tcp_v4_check(skb->len - ip_hdrlen(skb), iph->saddr, iph->daddr, skb_checksum(skb, ip_hdrlen(skb), skb->len - ip_hdrlen(skb), 0));
		skb->ip_summed = CHECKSUM_COMPLETE;
	}

	skb_push(skb, sizeof(struct icmphdr) + sizeof(struct addhdr));
	memcpy(skb->data, skb->data + sizeof(struct icmphdr) + sizeof(struct addhdr), ip_hdrlen(skb));
	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	iph->protocol = IPPROTO_ICMP;
	iph->tot_len = htons(skb->len);

	icmp = (struct icmphdr *)(skb->data + ip_hdrlen(skb));
	icmp->type = ICMP_ECHO;
	icmp->code = 0;
	icmp->un.echo.id = 1;
	icmp->un.echo.sequence = 1;

	add = (struct addhdr *)(skb->data + ip_hdrlen(skb) + sizeof(struct icmphdr));
	add->saddr = iph->saddr;
	add->daddr = iph->daddr;
	add->sport = th->source;
	add->dport = th->dest;
	add->len = skb->len;
	add->magic = skb->len;

	skb_set_transport_header(skb, ip_hdrlen(skb));

	icmp->checksum = 0;
	icmp->checksum = csum_fold(csum_partial(skb->data + ip_hdrlen(skb), skb->len - ip_hdrlen(skb), 0));

	ip_send_check(iph);
	skb->ip_summed = CHECKSUM_NONE;

	return NF_ACCEPT;
}

static unsigned int pre_route(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
	struct iphdr *iph;
	struct tcphdr *th;
	struct icmphdr *icmp;
	struct addhdr *add;

	if (!pskb_may_pull(skb, sizeof(struct iphdr) + sizeof(struct tcphdr) + sizeof(struct icmphdr) + sizeof(struct addhdr)))
		return NF_ACCEPT;

	iph = ip_hdr(skb);
	if (iph->protocol != IPPROTO_ICMP)
		return NF_ACCEPT;

	icmp = (struct icmphdr*)(skb->data + ip_hdrlen(skb));
	if (icmp->type != ICMP_ECHO || icmp->code != 0)
		return NF_ACCEPT;

	add = (struct addhdr*)(skb->data + ip_hdrlen(skb) + sizeof(struct icmphdr));
	th = (struct tcphdr*)(skb->data + ip_hdrlen(skb) + sizeof(struct icmphdr) + sizeof(struct addhdr));
	if (ntohs(th->source) != icmp_port && ntohs(th->dest) != icmp_port)
		return NF_ACCEPT;

	//if (add->saddr != iph->saddr || add->daddr != iph->daddr || 
	if (add->sport != th->source || add->dport != th->dest || add->len != skb->len || add->magic != skb->len)
		return NF_ACCEPT;

	skb_pull(skb, sizeof(struct icmphdr) + sizeof(struct addhdr));
	// sizeof(struct icmphdr) + sizeof(struct addhdr) > ip_hdrlen(skb)
	memcpy(skb->data, skb->data - sizeof(struct icmphdr) - sizeof(struct addhdr), ip_hdrlen(skb));
	memcpy(skb->data - sizeof(struct ethhdr), skb->data - sizeof(struct icmphdr) - sizeof(struct addhdr) - sizeof(struct ethhdr), sizeof(struct ethhdr));
	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	iph->protocol = IPPROTO_TCP;
	iph->tot_len = htons(skb->len);

	ip_send_check(iph);
	skb->ip_summed = CHECKSUM_UNNECESSARY;
	skb_set_transport_header(skb, ip_hdrlen(skb));
	th = tcp_hdr(skb);
	skb_set_mac_header(skb, -(int)sizeof(struct ethhdr));

	return NF_ACCEPT;
}

static const struct nf_hook_ops ip_vs_ops[] = {
	{
		.hook     = local_out,
		.pf       = NFPROTO_IPV4,
		.hooknum  = NF_INET_LOCAL_OUT,
		.priority = 0,
	},
	{
		.hook     = pre_route,
		.pf       = NFPROTO_IPV4,
		.hooknum  = NF_INET_PRE_ROUTING,
		.priority = 0,
	},
};

static int net_init(void)
{
	if (nf_register_net_hooks(&init_net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)))
		return -1;

	return 0;
}

static void net_cleanup(void)
{
	nf_unregister_net_hooks(&init_net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
}

module_init(net_init);
module_exit(net_cleanup);
MODULE_LICENSE("GPL");

将包减小到mss以下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
diff --git a/src/4.15.18/tcp_output.c b/src/4.15.18/tcp_output.c
index 82613f5..270545e 100644
--- a/src/4.15.18/tcp_output.c
+++ b/src/4.15.18/tcp_output.c
@@ -52,6 +52,7 @@
 #include "fec_core.h"
 
 u32 sysctl_post_local = 0xffffff00;
+int sysctl_mss_adjust = 0;
 
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
             int push_one, gfp_t gfp);
@@ -1720,6 +1721,8 @@ unsigned int tcp_current_mss(struct sock *sk)
          mss_now = tcp_sync_mss(sk, mtu);
  }
 
+ mss_now -= sysctl_mss_adjust;
+
  header_len = tcp_established_options(sk, NULL, &opts, &md5, getconninfo(sk)) +
           sizeof(struct tcphdr);
  /* The mss_cache is sized based on tp->tcp_header_len, which assumes
@@ -3401,6 +3404,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
  skb_dst_set(skb, dst);
 
  mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
+ mss -= sysctl_mss_adjust;
 
  memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
@@ -3561,6 +3565,7 @@ static void tcp_connect_init(struct sock *sk)
  if (!tp->window_clamp)
      tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
  tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
+ tp->advmss -= sysctl_mss_adjust;
 
  tcp_initialize_rcv_mss(sk);
 
diff --git a/src/io_sysctl.c b/src/io_sysctl.c
index c3b2ddd..6fdc1df 100644
--- a/src/io_sysctl.c
+++ b/src/io_sysctl.c
@@ -9,6 +9,7 @@ extern int sysctl_detail;
 extern int sysctl_data_ssthresh;
 
 extern int sysctl_post_local;
+extern int sysctl_mss_adjust;
 
 extern unsigned long total_session;
 extern unsigned long current_session;
@@ -39,6 +40,13 @@ static struct ctl_table tcp_sysctl_table[] = {
      .mode = 0644,
      .proc_handler = proc_dointvec
  },
+ {
+     .procname = "mss_adjust",
+     .data = &sysctl_mss_adjust,
+     .maxlen = sizeof(int),
+     .mode = 0644,
+     .proc_handler = proc_dointvec
+ },
  {
      .procname = "total_session",
      .data = &total_session,

MPTCP 回复一样的option

对方回复一模一样的option

例如

curl ksurl.cn

1
2
3
4
5
6
7
8
9
10
11
01:42:57.092471 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [S], seq 846976861, win 64240, options [mss 1460,nop,nop,sackOK,nop,wscale 7,mptcp capable csum {0xc7c6d84045bd8248}], length 0
01:42:57.130413 IP 103.102.200.3.80 > 192.168.8.162.34366: Flags [S.], seq 668917669, ack 846976862, win 0, options [mss 1452,nop,nop,sackOK,nop,nop,nop,nop,mptcp capable csum {0xc7c6d84045bd8248}], length 0
01:42:57.130498 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp capable csum {0xc7c6d84045bd8248,0xc7c6d84045bd8248},mptcp dss ack 1200875982], length 0
01:42:57.130525 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp add-addr id 3 11.0.0.1,mptcp dss ack 1200875982], length 0
01:42:57.616370 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp dss ack 1200875982], length 0
01:42:57.654157 IP 103.102.200.3.80 > 192.168.8.162.34366: Flags [.], ack 1, win 29200, length 0
01:42:58.612344 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp dss ack 1200875982], length 0
01:42:58.650740 IP 103.102.200.3.80 > 192.168.8.162.34366: Flags [.], ack 1, win 29200, length 0
01:43:00.560359 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp dss ack 1200875982], length 0
01:43:00.598942 IP 103.102.200.3.80 > 192.168.8.162.34366: Flags [.], ack 1, win 29200, length 0
...

修复

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
diff --git a/src/4.15.18/tcp_input.c b/src/4.15.18/tcp_input.c
index 1c36791..397cb89 100644
--- a/src/4.15.18/tcp_input.c
+++ b/src/4.15.18/tcp_input.c
@@ -5845,6 +5845,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                if (tp->request_mptcp || mptcp(tp)) {
                        int ret;
 
+                       if (!mptcp(tp) && mopt.saw_mpc) {
+                               struct tcp_sock *meta_tp = tcp_sk(sk);
+                               if (meta_tp->mptcp_loc_key == mopt.mptcp_sender_key)
+                                       mopt.saw_mpc = 0;
+                       }
                        rcu_read_lock();
                        local_bh_disable();
                        ret = mptcp_rcv_synsent_state_process(sk, &sk,

修复后

curl ksurl.cn

1
2
3
4
5
6
7
8
9
10
11
01:48:11.136480 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [S], seq 1334883078, win 65320, options [mss 1420,nop,nop,sackOK,nop,wscale 7,mptcp capable csum {0xa48a1610f304b3a}], length 0
01:48:11.174632 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [S.], seq 2018132645, ack 1334883079, win 0, options [mss 1420,nop,nop,sackOK,nop,nop,nop,nop,mptcp capable csum {0xa48a1610f304b3a}], length 0
01:48:11.174720 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [.], ack 1, win 65320, length 0
01:48:11.213236 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [.], ack 1, win 29200, length 0
01:48:11.213283 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [P.], seq 1:73, ack 1, win 65320, length 72: HTTP: GET / HTTP/1.1
01:48:11.252192 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [.], ack 73, win 29200, length 0
01:48:11.253261 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [P.], seq 1:397, ack 73, win 29200, length 396: HTTP: HTTP/1.1 302 Moved Temporarily
01:48:11.253300 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [.], ack 397, win 64924, length 0
01:48:11.253541 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [F.], seq 73, ack 397, win 64924, length 0
01:48:11.292118 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [F.], seq 397, ack 74, win 29200, length 0
01:48:11.292182 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [.], ack 398, win 64923, length 0