kk Blog —— 通用基础

MPTCP pre_established fully_established

2020-07-19 01:37:00

一、pre_established

只在subflow的客户端起作用，在收到synack时置为1，收到第4个ack时置为0，防止在synack到第四个ack期间发送数据包。

因为服务端要用第三个ack建连，客户端收到第四个ack表示建连成功，成功之后才能发数据

mptcp_ack_timer

所以客户端需要 mptcp_ack_timer，不停的发送第三个ack，直到收到第四个ack

二、fully_established

fully_established 和 pre_established 互不相关

mptcp需要四次握手，四次握手完成后 fully_established=1, 再之后才能建立subflow

tcp三次握手后，client和server两边的 fully_established = 0, 进入fully_established的条件如下：

本端没发送数据包，但一直收到对端的mptcp数据包，见 mptcp_prevalidate_skb()

if (!tp->mptcp->fully_established) {
	tp->mptcp->init_rcv_wnd -= skb->len;
	if (tp->mptcp->init_rcv_wnd < 0)
		mptcp_become_fully_estab(sk);
}

本端发出去的mptcp数据包被mptcp_ack了，见 mptcp_process_data_ack

if (unlikely(!tp->mptcp->fully_established) && tp->mptcp->snt_isn + 1 != TCP_SKB_CB(skb)->ack_seq)
	mptcp_become_fully_estab(sk);

如果收到非mptcp数据包，则回退普通tcp，回退也会设置fully_established=1，见mptcp_prevalidate_skb()

if (!tp->mptcp->fully_established && !mptcp_is_data_seq(skb) &&
	!tp->mptcp->mapping_present && !mpcb->infinite_mapping_rcv) {
	...
	if (!is_master_tp(tp)) { // subflow reset，master才回退
		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBDATASUB);
		mptcp_send_reset(sk);
		return 1;
	}
	...
	tp->mptcp->fully_established = 1;
}

如果本端发出去的数据包被不带mptcp的ack ack了，那么大概率是对端没建立mptcp连接。那么本端回退到普通tcp，回退也会设置fully_established=1。

tcp_ack() {
	mptcp_fallback_infinite() {
		if (likely(tp->mptcp->fully_established))
			return false;

		if (!(flag & MPTCP_FLAG_DATA_ACKED)) // 被ack的包一定是mptcp数据包
			return false;

		if (!is_master_tp(tp)) { // subflow reset，master才回退
			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBACKSUB);
			return true;
		}
		...
	}
}

因为 fully_established = 0 时刚完成3次握手，所以上面说的数据包基本都是第一个数据包

以上条件对于client和server都适用，因为3次握手后谁先发包都是正常的。

mptcp 连接在 fully_established=1 之后再收到不含mptcp option的包

不会进行mapping，见mptcp_detect_mapping()

if (!mptcp_is_data_seq(skb)) {
	if (!tp->mptcp->mapping_present && tp->rcv_nxt - tp->copied_seq > 65536) {
		mptcp_send_reset(sk);
		return 1;
	}
	return 0;
}

对于一个map_data_len包，可能被差成了多个包传输：

如果多个包全不是mptcp包，则mapping_present=0，那么mptcp_queue_skb() 会直接return，然后 tp->rcv_nxt - tp->copied_seq > 65536, 然后被reset

如果前一部分是mptcp的包，后一部分不是mptcp包，则mapping_present=1，然后会被mptcp_verif_dss_csum() reset

TCP包增一个ICMP头

2020-07-09 02:00:00

发送加头，接收解头。checksum失效，需要额外处理

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/sctp.h>
#include <linux/icmp.h>
#include <linux/slab.h>

#include <net/ip.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h>                   /* for icmp_send */
#include <net/route.h>
#include <net/ip6_checksum.h>
#include <net/netns/generic.h>      /* net_generic() */

#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>

#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
#include <linux/netfilter_ipv6.h>
#include <net/ip6_route.h>
#endif

#include <net/ip_vs.h>
#include <linux/dns_resolver.h>



#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/icmp.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/kallsyms.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/neighbour.h>
#include <net/netevent.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32))
#include <net/net_namespace.h>
#endif
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>

#include <linux/inet.h>
#include <linux/skbuff.h>
#include <linux/kthread.h>

int icmp_port = 80;
module_param(icmp_port, int, 0644);

struct addhdr {
	u32 saddr, daddr;
	u16 sport, dport;
	u16 len;
	u16 magic;
};

static unsigned int local_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
	struct iphdr *iph = ip_hdr(skb);
	struct tcphdr *th;
	struct icmphdr *icmp;
	struct addhdr *add;
	int delta;

	iph = ip_hdr(skb);
	th = tcp_hdr(skb);
	if (iph->protocol != IPPROTO_TCP)
		return NF_ACCEPT;

	if (ntohs(th->source) != icmp_port && ntohs(th->dest) != icmp_port)
		return NF_ACCEPT;

	if (skb->len + sizeof(struct icmphdr) + sizeof(struct addhdr) > 1500)
		return NF_ACCEPT;

	delta = sizeof(struct icmphdr) + sizeof(struct addhdr) + sizeof(struct ethhdr) - skb_headroom(skb);
	if (delta > 0 && pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
		return NF_ACCEPT;

	iph = ip_hdr(skb);
	th = tcp_hdr(skb);

	if (skb->ip_summed != CHECKSUM_COMPLETE) {
		th->check = 0;
		skb->csum = 0;
		th->check = tcp_v4_check(skb->len - ip_hdrlen(skb), iph->saddr, iph->daddr, skb_checksum(skb, ip_hdrlen(skb), skb->len - ip_hdrlen(skb), 0));
		skb->ip_summed = CHECKSUM_COMPLETE;
	}

	skb_push(skb, sizeof(struct icmphdr) + sizeof(struct addhdr));
	memcpy(skb->data, skb->data + sizeof(struct icmphdr) + sizeof(struct addhdr), ip_hdrlen(skb));
	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	iph->protocol = IPPROTO_ICMP;
	iph->tot_len = htons(skb->len);

	icmp = (struct icmphdr *)(skb->data + ip_hdrlen(skb));
	icmp->type = ICMP_ECHO;
	icmp->code = 0;
	icmp->un.echo.id = 1;
	icmp->un.echo.sequence = 1;

	add = (struct addhdr *)(skb->data + ip_hdrlen(skb) + sizeof(struct icmphdr));
	add->saddr = iph->saddr;
	add->daddr = iph->daddr;
	add->sport = th->source;
	add->dport = th->dest;
	add->len = skb->len;
	add->magic = skb->len;

	skb_set_transport_header(skb, ip_hdrlen(skb));

	icmp->checksum = 0;
	icmp->checksum = csum_fold(csum_partial(skb->data + ip_hdrlen(skb), skb->len - ip_hdrlen(skb), 0));

	ip_send_check(iph);
	skb->ip_summed = CHECKSUM_NONE;

	return NF_ACCEPT;
}

static unsigned int pre_route(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
	struct iphdr *iph;
	struct tcphdr *th;
	struct icmphdr *icmp;
	struct addhdr *add;

	if (!pskb_may_pull(skb, sizeof(struct iphdr) + sizeof(struct tcphdr) + sizeof(struct icmphdr) + sizeof(struct addhdr)))
		return NF_ACCEPT;

	iph = ip_hdr(skb);
	if (iph->protocol != IPPROTO_ICMP)
		return NF_ACCEPT;

	icmp = (struct icmphdr*)(skb->data + ip_hdrlen(skb));
	if (icmp->type != ICMP_ECHO || icmp->code != 0)
		return NF_ACCEPT;

	add = (struct addhdr*)(skb->data + ip_hdrlen(skb) + sizeof(struct icmphdr));
	th = (struct tcphdr*)(skb->data + ip_hdrlen(skb) + sizeof(struct icmphdr) + sizeof(struct addhdr));
	if (ntohs(th->source) != icmp_port && ntohs(th->dest) != icmp_port)
		return NF_ACCEPT;

	//if (add->saddr != iph->saddr || add->daddr != iph->daddr || 
	if (add->sport != th->source || add->dport != th->dest || add->len != skb->len || add->magic != skb->len)
		return NF_ACCEPT;

	skb_pull(skb, sizeof(struct icmphdr) + sizeof(struct addhdr));
	// sizeof(struct icmphdr) + sizeof(struct addhdr) > ip_hdrlen(skb)
	memcpy(skb->data, skb->data - sizeof(struct icmphdr) - sizeof(struct addhdr), ip_hdrlen(skb));
	memcpy(skb->data - sizeof(struct ethhdr), skb->data - sizeof(struct icmphdr) - sizeof(struct addhdr) - sizeof(struct ethhdr), sizeof(struct ethhdr));
	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	iph->protocol = IPPROTO_TCP;
	iph->tot_len = htons(skb->len);

	ip_send_check(iph);
	skb->ip_summed = CHECKSUM_UNNECESSARY;
	skb_set_transport_header(skb, ip_hdrlen(skb));
	th = tcp_hdr(skb);
	skb_set_mac_header(skb, -(int)sizeof(struct ethhdr));

	return NF_ACCEPT;
}

static const struct nf_hook_ops ip_vs_ops[] = {
	{
		.hook     = local_out,
		.pf       = NFPROTO_IPV4,
		.hooknum  = NF_INET_LOCAL_OUT,
		.priority = 0,
	},
	{
		.hook     = pre_route,
		.pf       = NFPROTO_IPV4,
		.hooknum  = NF_INET_PRE_ROUTING,
		.priority = 0,
	},
};

static int net_init(void)
{
	if (nf_register_net_hooks(&init_net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)))
		return -1;

	return 0;
}

static void net_cleanup(void)
{
	nf_unregister_net_hooks(&init_net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
}

module_init(net_init);
module_exit(net_cleanup);
MODULE_LICENSE("GPL");

将包减小到mss以下

2020-07-09 01:57:00

diff --git a/src/4.15.18/tcp_output.c b/src/4.15.18/tcp_output.c
index 82613f5..270545e 100644
--- a/src/4.15.18/tcp_output.c
+++ b/src/4.15.18/tcp_output.c
@@ -52,6 +52,7 @@
 #include "fec_core.h"
 
 u32 sysctl_post_local = 0xffffff00;
+int sysctl_mss_adjust = 0;
 
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
             int push_one, gfp_t gfp);
@@ -1720,6 +1721,8 @@ unsigned int tcp_current_mss(struct sock *sk)
          mss_now = tcp_sync_mss(sk, mtu);
  }
 
+ mss_now -= sysctl_mss_adjust;
+
  header_len = tcp_established_options(sk, NULL, &opts, &md5, getconninfo(sk)) +
           sizeof(struct tcphdr);
  /* The mss_cache is sized based on tp->tcp_header_len, which assumes
@@ -3401,6 +3404,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
  skb_dst_set(skb, dst);
 
  mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
+ mss -= sysctl_mss_adjust;
 
  memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
@@ -3561,6 +3565,7 @@ static void tcp_connect_init(struct sock *sk)
  if (!tp->window_clamp)
      tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
  tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
+ tp->advmss -= sysctl_mss_adjust;
 
  tcp_initialize_rcv_mss(sk);
 
diff --git a/src/io_sysctl.c b/src/io_sysctl.c
index c3b2ddd..6fdc1df 100644
--- a/src/io_sysctl.c
+++ b/src/io_sysctl.c
@@ -9,6 +9,7 @@ extern int sysctl_detail;
 extern int sysctl_data_ssthresh;
 
 extern int sysctl_post_local;
+extern int sysctl_mss_adjust;
 
 extern unsigned long total_session;
 extern unsigned long current_session;
@@ -39,6 +40,13 @@ static struct ctl_table tcp_sysctl_table[] = {
      .mode = 0644,
      .proc_handler = proc_dointvec
  },
+ {
+     .procname = "mss_adjust",
+     .data = &sysctl_mss_adjust,
+     .maxlen = sizeof(int),
+     .mode = 0644,
+     .proc_handler = proc_dointvec
+ },
  {
      .procname = "total_session",
      .data = &total_session,

← Older Blog Archives Newer →

一、pre_established

mptcp_ack_timer

二、fully_established

mptcp 连接在 fully_established=1 之后再收到 不含mptcp option的包

mptcp 连接在 fully_established=1 之后再收到不含mptcp option的包