diff --git a/.config b/.config
index cf146c1c..f5be3370 100644
--- a/.config
+++ b/.config
@@ -1054,6 +1054,11 @@ CONFIG_TCP_CONG_ILLINOIS=m
CONFIG_TCP_CONG_DCTCP=m
CONFIG_TCP_CONG_CDG=m
CONFIG_TCP_CONG_BBR=m
+CONFIG_TCP_CONG_LIA=m
+CONFIG_TCP_CONG_OLIA=m
+CONFIG_TCP_CONG_WVEGAS=m
+CONFIG_TCP_CONG_BALIA=m
+# CONFIG_TCP_CONG_MCTCPDESYNC is not set
CONFIG_DEFAULT_CUBIC=y
# CONFIG_DEFAULT_RENO is not set
CONFIG_DEFAULT_TCP_CONG="cubic"
@@ -1090,6 +1095,21 @@ CONFIG_IPV6_PIMSM_V2=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPV6_SEG6_HMAC=y
CONFIG_NETLABEL=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_PM_ADVANCED=y
+CONFIG_MPTCP_FULLMESH=y
+CONFIG_MPTCP_NDIFFPORTS=m
+CONFIG_MPTCP_BINDER=m
+# CONFIG_MPTCP_NETLINK is not set
+CONFIG_DEFAULT_FULLMESH=y
+# CONFIG_DEFAULT_DUMMY is not set
+CONFIG_DEFAULT_MPTCP_PM="fullmesh"
+CONFIG_MPTCP_SCHED_ADVANCED=y
+# CONFIG_MPTCP_BLEST is not set
+CONFIG_MPTCP_ROUNDROBIN=m
+CONFIG_MPTCP_REDUNDANT=m
+CONFIG_DEFAULT_SCHEDULER=y
+CONFIG_DEFAULT_MPTCP_SCHED="default"
CONFIG_NETWORK_SECMARK=y
CONFIG_NET_PTP_CLASSIFY=y
CONFIG_NETWORK_PHY_TIMESTAMPING=y
ip rule add table 1 from 192.168.2.5
ip route add 192.168.2.0/24 dev enp0s9 scope link table 1
ip route add default via 192.168.2.4 dev enp0s9 table 1
ip rule add table 2 from 192.168.3.5
ip route add 192.168.3.0/24 dev enp0s10 scope link table 2
ip route add default via 192.168.3.4 dev enp0s10 table 2
MPTCP路径管理,有四个不同的配置值,分别是 default/fullmesh/ndiffports/binder。default/ndiffports/fullmesh分别选择单路、多路或者全路进行传输。其中单路是指跟传统TCP状态一样还是用单一的TCP子流进行传输,多路是当前所有TCP子流中用户选择x条子流数进行传输,全路是指将当前所有可用的TCP子流应用到网络传输中。而binder参考了文献 Binder: a system to aggregate multiple internet gateways in community networks。
Linux 4.5/4.6所谓的对reuseport的优化主要体现在查询速度上,在优化前,在HASH冲突链表上遍历所有的套接字之后才能知道到底取哪个(基于一种冒泡的score打分机制,不完成一轮冒泡遍历,不能确定谁的score最高),之所以如此低效是因为内核将reuseport的所有套接字和其它套接字混合在了一起,查找是平坦的,正常的做法应该是将它们分为一个组,进行分层查找,先找到这个组(这个很容易),然后再在组中找具体的套接字。Linux 4.5针对UDP做了上述优化,而Linux 4.6则将这个优化引入到了TCP。
void dev_activate(struct net_device *dev)
{
int need_watchdog;
/* No queueing discipline is attached to device;
create default one i.e. pfifo_fast for devices,
which need queueing and noqueue_qdisc for
virtual interfaces
*/
if (dev->qdisc == &noop_qdisc)
attach_default_qdiscs(dev);
...
}
static void attach_default_qdiscs(struct net_device *dev)
{
struct netdev_queue *txq;
struct Qdisc *qdisc;
txq = netdev_get_tx_queue(dev, 0);
if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping;
atomic_inc(&dev->qdisc->refcnt);
} else {///multi queue
qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT);
if (qdisc) {
qdisc->ops->attach(qdisc);
dev->qdisc = qdisc;
}
}
}
static void attach_one_default_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_unused)
{
struct Qdisc *qdisc;
if (dev->tx_queue_len) {
qdisc = qdisc_create_dflt(dev, dev_queue,
&pfifo_fast_ops, TC_H_ROOT);
if (!qdisc) {
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
}
/* Can by-pass the queue discipline for default qdisc */
qdisc->flags |= TCQ_F_CAN_BYPASS;
} else {
qdisc = &noqueue_qdisc;
}
dev_queue->qdisc_sleeping = qdisc;
}
创建noqueue
开始尝试直接删除设备默认的pfifo_fast队列,发现会出错:
123456
# tc qdisc del dev vethd4ea root
RTNETLINK answers: No such file or directory
# tc -s qdisc ls dev vethd4ea
qdisc pfifo_fast 0: root refcnt 2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
Sent 29705382 bytes 441562 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
后来看到Jesper Brouer给出一个替换默认队列的方式,尝试了一下,成功完成。
替换默认的qdisc队列
12345678
# tc qdisc replace dev vethd4ea root pfifo limit 100
# tc -s qdisc ls dev vethd4ea
qdisc pfifo 8001: root refcnt 2 limit 100p
Sent 264 bytes 4 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
# ip link show vethd4ea
9: vethd4ea: <BROADCAST,UP,LOWER_UP> mtu 1500 qdisc pfifo master docker0 state UP mode DEFAULT qlen 1000
link/ether 3a:15:3b:e1:d7:6d brd ff:ff:ff:ff:ff:ff
修改队列长度
1
# ifconfig vethd4ea txqueuelen 0
删除qdisc
1234
# tc qdisc del dev vethd4ea root
# ip link show vethd4ea
9: vethd4ea: <BROADCAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT
link/ether 3a:15:3b:e1:d7:6d brd ff:ff:ff:ff:ff:ff