kk Blog —— 通用基础


date [-d @int|str] [+%s|"+%F %T"]
netstat -ltunp
sar -n DEV 1

将包减小到mss以下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
diff --git a/src/4.15.18/tcp_output.c b/src/4.15.18/tcp_output.c
index 82613f5..270545e 100644
--- a/src/4.15.18/tcp_output.c
+++ b/src/4.15.18/tcp_output.c
@@ -52,6 +52,7 @@
 #include "fec_core.h"
 
 u32 sysctl_post_local = 0xffffff00;
+int sysctl_mss_adjust = 0;
 
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
             int push_one, gfp_t gfp);
@@ -1720,6 +1721,8 @@ unsigned int tcp_current_mss(struct sock *sk)
          mss_now = tcp_sync_mss(sk, mtu);
  }
 
+ mss_now -= sysctl_mss_adjust;
+
  header_len = tcp_established_options(sk, NULL, &opts, &md5, getconninfo(sk)) +
           sizeof(struct tcphdr);
  /* The mss_cache is sized based on tp->tcp_header_len, which assumes
@@ -3401,6 +3404,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
  skb_dst_set(skb, dst);
 
  mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
+ mss -= sysctl_mss_adjust;
 
  memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
@@ -3561,6 +3565,7 @@ static void tcp_connect_init(struct sock *sk)
  if (!tp->window_clamp)
      tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
  tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
+ tp->advmss -= sysctl_mss_adjust;
 
  tcp_initialize_rcv_mss(sk);
 
diff --git a/src/io_sysctl.c b/src/io_sysctl.c
index c3b2ddd..6fdc1df 100644
--- a/src/io_sysctl.c
+++ b/src/io_sysctl.c
@@ -9,6 +9,7 @@ extern int sysctl_detail;
 extern int sysctl_data_ssthresh;
 
 extern int sysctl_post_local;
+extern int sysctl_mss_adjust;
 
 extern unsigned long total_session;
 extern unsigned long current_session;
@@ -39,6 +40,13 @@ static struct ctl_table tcp_sysctl_table[] = {
      .mode = 0644,
      .proc_handler = proc_dointvec
  },
+ {
+     .procname = "mss_adjust",
+     .data = &sysctl_mss_adjust,
+     .maxlen = sizeof(int),
+     .mode = 0644,
+     .proc_handler = proc_dointvec
+ },
  {
      .procname = "total_session",
      .data = &total_session,

MPTCP 回复一样的option

对方回复一模一样的option

例如

curl ksurl.cn

1
2
3
4
5
6
7
8
9
10
11
01:42:57.092471 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [S], seq 846976861, win 64240, options [mss 1460,nop,nop,sackOK,nop,wscale 7,mptcp capable csum {0xc7c6d84045bd8248}], length 0
01:42:57.130413 IP 103.102.200.3.80 > 192.168.8.162.34366: Flags [S.], seq 668917669, ack 846976862, win 0, options [mss 1452,nop,nop,sackOK,nop,nop,nop,nop,mptcp capable csum {0xc7c6d84045bd8248}], length 0
01:42:57.130498 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp capable csum {0xc7c6d84045bd8248,0xc7c6d84045bd8248},mptcp dss ack 1200875982], length 0
01:42:57.130525 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp add-addr id 3 11.0.0.1,mptcp dss ack 1200875982], length 0
01:42:57.616370 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp dss ack 1200875982], length 0
01:42:57.654157 IP 103.102.200.3.80 > 192.168.8.162.34366: Flags [.], ack 1, win 29200, length 0
01:42:58.612344 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp dss ack 1200875982], length 0
01:42:58.650740 IP 103.102.200.3.80 > 192.168.8.162.34366: Flags [.], ack 1, win 29200, length 0
01:43:00.560359 IP 192.168.8.162.34366 > 103.102.200.3.80: Flags [.], ack 1, win 64240, options [mptcp dss ack 1200875982], length 0
01:43:00.598942 IP 103.102.200.3.80 > 192.168.8.162.34366: Flags [.], ack 1, win 29200, length 0
...

修复

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
diff --git a/src/4.15.18/tcp_input.c b/src/4.15.18/tcp_input.c
index 1c36791..397cb89 100644
--- a/src/4.15.18/tcp_input.c
+++ b/src/4.15.18/tcp_input.c
@@ -5845,6 +5845,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                if (tp->request_mptcp || mptcp(tp)) {
                        int ret;
 
+                       if (!mptcp(tp) && mopt.saw_mpc) {
+                               struct tcp_sock *meta_tp = tcp_sk(sk);
+                               if (meta_tp->mptcp_loc_key == mopt.mptcp_sender_key)
+                                       mopt.saw_mpc = 0;
+                       }
                        rcu_read_lock();
                        local_bh_disable();
                        ret = mptcp_rcv_synsent_state_process(sk, &sk,

修复后

curl ksurl.cn

1
2
3
4
5
6
7
8
9
10
11
01:48:11.136480 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [S], seq 1334883078, win 65320, options [mss 1420,nop,nop,sackOK,nop,wscale 7,mptcp capable csum {0xa48a1610f304b3a}], length 0
01:48:11.174632 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [S.], seq 2018132645, ack 1334883079, win 0, options [mss 1420,nop,nop,sackOK,nop,nop,nop,nop,mptcp capable csum {0xa48a1610f304b3a}], length 0
01:48:11.174720 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [.], ack 1, win 65320, length 0
01:48:11.213236 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [.], ack 1, win 29200, length 0
01:48:11.213283 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [P.], seq 1:73, ack 1, win 65320, length 72: HTTP: GET / HTTP/1.1
01:48:11.252192 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [.], ack 73, win 29200, length 0
01:48:11.253261 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [P.], seq 1:397, ack 73, win 29200, length 396: HTTP: HTTP/1.1 302 Moved Temporarily
01:48:11.253300 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [.], ack 397, win 64924, length 0
01:48:11.253541 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [F.], seq 73, ack 397, win 64924, length 0
01:48:11.292118 IP 103.102.200.3.80 > 192.168.8.162.34388: Flags [F.], seq 397, ack 74, win 29200, length 0
01:48:11.292182 IP 192.168.8.162.34388 > 103.102.200.3.80: Flags [.], ack 398, win 64923, length 0

MPTCP_OPTION

解析见 mptcp_parse_options()

MPTCP_SUB_CAPABLE

1
2
3
4
5
#define MPTCP_SUB_CAPABLE                       0
#define MPTCP_SUB_LEN_CAPABLE_SYN               12
#define MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN         12
#define MPTCP_SUB_LEN_CAPABLE_ACK               20
#define MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN         20

最初的三次握手时用

MPTCP_SUB_JOIN

1
2
3
4
5
6
7
#define MPTCP_SUB_JOIN                  1
#define MPTCP_SUB_LEN_JOIN_SYN          12
#define MPTCP_SUB_LEN_JOIN_SYN_ALIGN    12
#define MPTCP_SUB_LEN_JOIN_SYNACK       16
#define MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN 16
#define MPTCP_SUB_LEN_JOIN_ACK          24
#define MPTCP_SUB_LEN_JOIN_ACK_ALIGN    24

第二次、第三次、。。。握手时用

MPTCP_SUB_DSS

1
#define MPTCP_SUB_DSS           2

MPTCP_SUB_ADD_ADDR, MPTCP_SUB_REMOVE_ADDR

1
2
3
4
5
6
7
8
9
10
11
12
#define MPTCP_SUB_ADD_ADDR              3
#define MPTCP_SUB_LEN_ADD_ADDR4         8
#define MPTCP_SUB_LEN_ADD_ADDR4_VER1    16
#define MPTCP_SUB_LEN_ADD_ADDR6         20
#define MPTCP_SUB_LEN_ADD_ADDR6_VER1    28
#define MPTCP_SUB_LEN_ADD_ADDR4_ALIGN   8
#define MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1      16
#define MPTCP_SUB_LEN_ADD_ADDR6_ALIGN   20
#define MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1      28

#define MPTCP_SUB_REMOVE_ADDR   4
#define MPTCP_SUB_LEN_REMOVE_ADDR       4

fullmesh 模式通告ip

MPTCP_SUB_PRIO

1
2
3
4
#define MPTCP_SUB_PRIO          5
#define MPTCP_SUB_LEN_PRIO      3
#define MPTCP_SUB_LEN_PRIO_ADDR 4
#define MPTCP_SUB_LEN_PRIO_ALIGN        4

./ip/ip link set dev enp0s3 multipath off/on/backup

backup命令就是将该接口设置为backup模式,并且会通过PRIO option通知对方,两边会标记low_prio、rcv_low_prio。但目前所有pm都没有用到low_prio。

MPTCP_SUB_FAIL

1
2
3
#define MPTCP_SUB_FAIL          6
#define MPTCP_SUB_LEN_FAIL      12 
#define MPTCP_SUB_LEN_FAIL_ALIGN        12

MPTCP_SUB_FCLOSE

1
2
3
#define MPTCP_SUB_FCLOSE        7
#define MPTCP_SUB_LEN_FCLOSE    12
#define MPTCP_SUB_LEN_FCLOSE_ALIGN      12