Linux 密钥保留服务(Linux key retention service)是在 Linux 2.6 中引入的,它的主要意图是在 Linux 内核中缓存身份验证数据。远程文件系统和其他内核服务可以使用这个服务来管理密码学、身份验证标记、跨域用户映射和其他安全问题。它还使 Linux 内核能够快速访问所需的密钥,并可以用来将密钥操作(比如添加、更新和删除)委托给用户空间。
本文将概述 Linux 密钥保留服务,定义它的术语,帮助您快速掌握 Linux 密钥的使用方法。您将通过示例代码了解如何在内核模块中使用 Linux 密钥保留服务。在编写本文时使用的内核版本是 2.6.20。
什么是密钥?
密钥(key)是一组密码学数据、身份验证标记或某些相似的元素,它在内核中由 struct key 表示。在 Linux 内核源代码中,struct key 是在 include/linux/key.h 下定义的。
root@phoenix set.5]# insmod ./kernel.land/newkey.ko
oading the module ...
egistered "learning_key"
接下来,执行用户级程序。
清单 8. 执行用户级程序
123456789
root@phoenix set.5]# ./user.land/session
n /var/log/message, you will see similar output
nstalling session keyring:
eyring allocated successfully.
eyring instantiated and linked successfully.
ew session keyring installed successfully.
ey of new type allocated successfully.
ew key type linked to current session.
Linux 密钥保留服务是一种新的机制,其用途是保存与安全相关的信息,让 Linux 内核可以快速地访问这些信息。这个服务仍然处于初级阶段,刚刚开始获得认可。OpenAFS 使用 Linux 密钥保留服务来实现进程身份验证组(PAG),NFSv4 和 MIT Kerberos 也使用它。Linux 密钥保留服务仍然在进行开发,以后可能会修改或改进。
下载资源
使用 Linux 密钥保留服务的示例应用程序 (key.retention.services.zip | 4KB)
A -------> Hello (plain text) ---------> B
A <------- Hello (plain text) ---------- B
A <------ B Send Certificate to A --- B
A ------- cipher text ------------------> B
A <------ cipher text ------------------- B
… …
A在接受了B发过来的证书以后,A,B就可以使用密文进行通信了。
如果C想伪装成B,应该怎么做呢?我们想象下面的通话过程:
123
A-------> Hello (plain text) ---------> C
A <------ Hello (plain text) ----------- C
A <------ C Send Certificate to A --- C
此时A没有怀疑C的身份,理所当然的接受了C的证书,并继续进行下面的通信
123
A------- cipher text ------------------> C
A <------ cipher text ------------------- C
… …
(1) On any given CPU, dependent memory accesses will be issued in order, with respect to itself.如Q = P; D = *Q;将保证其顺序执行
(2) Overlapping loads and stores within a particular CPU will appear to be ordered within that CPU.重叠的Load和Store操作将保证顺序执行(目标地址相同的Load、Store),如:a = X; X = b;
(3) It must_not be assumed that independent loads and stores will be issued in the order given.
(4) It must be assumed that overlapping memory accesses may be merged or discarded.如A = X; Y = A; => STORE A = X; Y = LOAD A; / or STORE *A = Y = X;
void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
if (tp->fastopen_rsk) {
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
tcp_fastopen_synack_timer(sk);
/* Before we receive ACK to our SYN-ACK don't retransmit
* anything else (e.g., data or FIN segments).
*/
return;
}
...
12345678910111213141516171819202122232425262728
/*
* Timer for Fast Open socket to retransmit SYNACK. Note that the
* sk here is the child socket, not the parent (listener) socket.
*/
static void tcp_fastopen_synack_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int max_retries = icsk->icsk_syn_retries ? :
sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct request_sock *req;
req = tcp_sk(sk)->fastopen_rsk;
req->rsk_ops->syn_ack_timeout(sk, req);
if (req->num_timeout >= max_retries) {
tcp_write_err(sk);
return;
}
/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
* returned from rtx_syn_ack() to make it more persistent like
* regular retransmit because if the child socket has been accepted
* it's not good to give up too easily.
*/
inet_rtx_syn_ack(sk, req);
req->num_timeout++;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
}
539 static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
540 const int max_retries,
541 const u8 rskq_defer_accept,
542 int *expire, int *resend)
543 {
544 if (!rskq_defer_accept) { //不需要等待数据到来再调用accept系统调用
545 *expire = req->num_timeout >= thresh; //超时次数达到限制则超时
546 *resend = 1; //重传SYN|ACK
547 return;
548 }
549 *expire = req->num_timeout >= thresh && //超时次数达到限制
550 (!inet_rsk(req)->acked || req->num_timeout >= max_retries); //ACK没有到来或超时次数达到最高上限
551 /*
552 * Do not resend while waiting for data after ACK,
553 * start to resend on end of deferring period to give
554 * last chance for data or ACK to create established socket.
555 */
556 *resend = !inet_rsk(req)->acked || //ACK没有到来
557 req->num_timeout >= rskq_defer_accept - 1; //超时次数超过或即将达到应用进程的限制,赶快重传SYN|ACK以便给对端最后一个机会建立连接
558 }
#define TFO_CLIENT_ENABLE 1
#define TFO_SERVER_ENABLE 2
#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */
/* Process SYN data but skip cookie validation */
#define TFO_SERVER_COOKIE_NOT_CHKED 0x100 // 收到cookie也不检查
/* Accept SYN data w/o any cookie option */
#define TFO_SERVER_COOKIE_NOT_REQD 0x200 // 不需要cookie需要data就能创建fastopen child,默认情况下syn的data会被忽略
/* Force enable TFO on all listeners, i.e., not requiring the
* TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen.
*/
#define TFO_SERVER_WO_SOCKOPT1 0x400 // 调listen后不需要再调setsockopt就开启fastopen
#define TFO_SERVER_WO_SOCKOPT2 0x800 // 调listen后不需要再调setsockopt就开启fastopen,backlog=TFO_SERVER_WO_SOCKOPT2>>16
/* Always create TFO child sockets on a TFO listener even when
* cookie/data not present. (For testing purpose!)
*/
#define TFO_SERVER_ALWAYS 0x1000 // 不需要cookie也不需要data就创建fastopen child, 容易被攻击,不开启
测试
开启
1234
echo 3 > /proc/sys/net/ipv4/tcp_fastopen # 1 开启客户端,2 开启服务端,3 都开启
tc qdisc add dev lo root netem delay 300ms # 设置延迟才能看出效果
ifconfig lo mtu 1500
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
struct sk_buff *syn_data = NULL, *data;
unsigned long last_syn_loss = 0;
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
&syn_loss, &last_syn_loss);//查询缓存的TFO cookie信息
/* Recurring FO SYN losses: revert to regular handshake temporarily */
if (syn_loss > 1 &&
time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
fo->cookie.len = -1;
goto fallback;
}
if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)//无论有没有cookie,都发送携带数据的SYN
fo->cookie.len = -1;
else if (fo->cookie.len <= 0) //没有cookie,发送携带TFO请求选项的SYN
goto fallback;
/* MSS for SYN-data is based on cached MSS and bounded by PMTU and
* user-MSS. Reserve maximum option space for middleboxes that add
* private TCP options. The cost is reduced data space in SYN :(
*/
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;//计算SYN包中的能够携带的数据的最大大小
syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
sk->sk_allocation);//复制SYN包中的内容,并扩展SKB中的空间
if (syn_data == NULL)
goto fallback;
for (i = 0; i < iovlen && syn_data->len < space; ++i) {//将用户态中缓存的数据copy到内核
struct iovec *iov = &fo->data->msg_iov[i];
unsigned char __user *from = iov->iov_base;
int len = iov->iov_len;
if (syn_data->len + len > space)//数据总长度大于SKB中空间的总大小
len = space - syn_data->len;
else if (i + 1 == iovlen)
/* No more data pending in inet_wait_for_connect() */
fo->data = NULL;//数据全部发送完毕,不需要在inet_wait_for_connect中等待时发送
if (skb_add_data(syn_data, from, len))//将用户数据copy到SKB中
goto fallback;
}
/* Queue a data-only packet after the regular SYN for retransmission */
data = pskb_copy(syn_data, sk->sk_allocation);
if (data == NULL)
goto fallback;
TCP_SKB_CB(data)->seq++;
TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
tcp_connect_queue_skb(sk, data);
fo->copied = data->len;
if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {//发送携带数据的SYN
tp->syn_data = (fo->copied > 0);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
goto done;
}
syn_data = NULL;
fallback:
/* Send a regular SYN with Fast Open cookie request option */
if (fo->cookie.len > 0)
fo->cookie.len = 0;
err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
if (err)
tp->syn_fastopen = 0;
kfree_skb(syn_data);
done:
fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
return err;
}
static int tcp_v4_conn_req_fastopen(struct sock *sk,
struct sk_buff *skb,
struct sk_buff *skb_synack,
struct request_sock *req)
{
struct tcp_sock *tp = tcp_sk(sk);
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const struct inet_request_sock *ireq = inet_rsk(req);
struct sock *child;
...
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);//生成子socket,其状态为TCP_SYN_RECV
...
err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
ireq->rmt_addr, ireq->opt);//构建SYN|ACK的IP头并将其发送出去
err = net_xmit_eval(err);
if (!err)
tcp_rsk(req)->snt_synack = tcp_time_stamp;
/* XXX (TFO) - is it ok to ignore error and continue? */
spin_lock(&queue->fastopenq->lock);
queue->fastopenq->qlen++;//将这个连接计入TFO queue
spin_unlock(&queue->fastopenq->lock);
...
tp = tcp_sk(child);
tp->fastopen_rsk = req;
/* Do a hold on the listner sk so that if the listener is being
* closed, the child that has been accepted can live on and still
* access listen_lock.
*/
sock_hold(sk);
tcp_rsk(req)->listener = sk;
/* RFC1323: The window in SYN & SYN/ACK segments is never
* scaled. So correct it appropriately.
*/
tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
/* Activate the retrans timer so that SYNACK can be retransmitted.
* The request socket is not added to the SYN table of the parent
* because it's been added to the accept queue directly.
*/
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
/* Add the child socket directly into the accept queue */
inet_csk_reqsk_queue_add(sk, req, child);
/* Now finish processing the fastopen child socket. */
inet_csk(child)->icsk_af_ops->rebuild_header(child);
tcp_init_congestion_control(child);
tcp_mtup_init(child);
tcp_init_buffer_space(child);
tcp_init_metrics(child);
/* Queue the data carried in the SYN packet. We need to first
* bump skb's refcnt because the caller will attempt to free it.
*
* XXX (TFO) - we honor a zero-payload TFO request for now.
* (Any reason not to?)
*/
if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {//SYN包中没有数据
/* Don't queue the skb if there is no payload in SYN.
* XXX (TFO) - How about SYN+FIN?
*/
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
} else {
skb = skb_get(skb);
skb_dst_drop(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
skb_set_owner_r(skb, child);
__skb_queue_tail(&child->sk_receive_queue, skb);//将数据放入child的接收队列中
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
tp->syn_data_acked = 1;
}
sk->sk_data_ready(sk, 0);//通知持有listening socket的进程调用accept系统调用创建新连接
bh_unlock_sock(child);
sock_put(child);
WARN_ON(req->sk == NULL);
return 0;
}