Commit 33cf7c90 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: add real socket cookies

A long standing problem in netlink socket dumps is the use
of kernel socket addresses as cookies.

1) It is a security concern.

2) Sockets can be reused quite quickly, so there is
   no guarantee a cookie is used once and identify
   a flow.

3) request sock, establish sock, and timewait socks
   for a given flow have different cookies.

Part of our effort to bring better TCP statistics requires
to switch to a different allocator.

In this patch, I chose to use a per network namespace 64bit generator,
and to use it only in the case a socket needs to be dumped to netlink.
(This might be refined later if needed)

Note that I tried to carry cookies from request sock, to establish sock,
then timewait sockets.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Eric Salo <salo@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 654eff45
...@@ -19,8 +19,8 @@ void sock_diag_unregister(const struct sock_diag_handler *h); ...@@ -19,8 +19,8 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
int sock_diag_check_cookie(void *sk, const __u32 *cookie); int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
void sock_diag_save_cookie(void *sk, __u32 *cookie); void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
......
...@@ -77,6 +77,8 @@ struct inet_request_sock { ...@@ -77,6 +77,8 @@ struct inet_request_sock {
#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
#define ir_iif req.__req_common.skc_bound_dev_if #define ir_iif req.__req_common.skc_bound_dev_if
#define ir_cookie req.__req_common.skc_cookie
#define ireq_net req.__req_common.skc_net
kmemcheck_bitfield_begin(flags); kmemcheck_bitfield_begin(flags);
u16 snd_wscale : 4, u16 snd_wscale : 4,
......
...@@ -122,6 +122,7 @@ struct inet_timewait_sock { ...@@ -122,6 +122,7 @@ struct inet_timewait_sock {
#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr #define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
#define tw_dport __tw_common.skc_dport #define tw_dport __tw_common.skc_dport
#define tw_num __tw_common.skc_num #define tw_num __tw_common.skc_num
#define tw_cookie __tw_common.skc_cookie
int tw_timeout; int tw_timeout;
volatile unsigned char tw_substate; volatile unsigned char tw_substate;
......
...@@ -56,6 +56,8 @@ struct net { ...@@ -56,6 +56,8 @@ struct net {
#endif #endif
spinlock_t rules_mod_lock; spinlock_t rules_mod_lock;
atomic64_t cookie_gen;
struct list_head list; /* list of network namespaces */ struct list_head list; /* list of network namespaces */
struct list_head cleanup_list; /* namespaces on death row */ struct list_head cleanup_list; /* namespaces on death row */
struct list_head exit_list; /* Use only net_mutex */ struct list_head exit_list; /* Use only net_mutex */
......
...@@ -199,6 +199,8 @@ struct sock_common { ...@@ -199,6 +199,8 @@ struct sock_common {
struct in6_addr skc_v6_rcv_saddr; struct in6_addr skc_v6_rcv_saddr;
#endif #endif
atomic64_t skc_cookie;
/* /*
* fields between dontcopy_begin/dontcopy_end * fields between dontcopy_begin/dontcopy_end
* are not copied in sock_copy() * are not copied in sock_copy()
...@@ -329,6 +331,7 @@ struct sock { ...@@ -329,6 +331,7 @@ struct sock {
#define sk_net __sk_common.skc_net #define sk_net __sk_common.skc_net
#define sk_v6_daddr __sk_common.skc_v6_daddr #define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define sk_cookie __sk_common.skc_cookie
socket_lock_t sk_lock; socket_lock_t sk_lock;
struct sk_buff_head sk_receive_queue; struct sk_buff_head sk_receive_queue;
......
...@@ -1538,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) ...@@ -1538,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_err = 0; newsk->sk_err = 0;
newsk->sk_priority = 0; newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id(); newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
/* /*
* Before updating sk_refcnt, we must commit prior changes to memory * Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details) * (Documentation/RCU/rculist_nulls.txt for details)
......
...@@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; ...@@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex); static DEFINE_MUTEX(sock_diag_table_mutex);
int sock_diag_check_cookie(void *sk, const __u32 *cookie) static u64 sock_gen_cookie(struct sock *sk)
{ {
if ((cookie[0] != INET_DIAG_NOCOOKIE || while (1) {
cookie[1] != INET_DIAG_NOCOOKIE) && u64 res = atomic64_read(&sk->sk_cookie);
((u32)(unsigned long)sk != cookie[0] ||
(u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) if (res)
return res;
res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
atomic64_cmpxchg(&sk->sk_cookie, 0, res);
}
}
int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie)
{
u64 res;
if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE)
return 0;
res = sock_gen_cookie(sk);
if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1])
return -ESTALE; return -ESTALE;
else
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(sock_diag_check_cookie); EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
void sock_diag_save_cookie(void *sk, __u32 *cookie) void sock_diag_save_cookie(struct sock *sk, __u32 *cookie)
{ {
cookie[0] = (u32)(unsigned long)sk; u64 res = sock_gen_cookie(sk);
cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
cookie[0] = (u32)res;
cookie[1] = (u32)(res >> 32);
} }
EXPORT_SYMBOL_GPL(sock_diag_save_cookie); EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
......
...@@ -641,6 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -641,6 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req); ireq = inet_rsk(req);
ireq->ir_loc_addr = ip_hdr(skb)->daddr; ireq->ir_loc_addr = ip_hdr(skb)->daddr;
ireq->ir_rmt_addr = ip_hdr(skb)->saddr; ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
ireq->ireq_net = sock_net(sk);
atomic64_set(&ireq->ir_cookie, 0);
/* /*
* Step 3: Process LISTEN state * Step 3: Process LISTEN state
......
...@@ -678,6 +678,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, ...@@ -678,6 +678,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
newsk->sk_write_space = sk_stream_write_space; newsk->sk_write_space = sk_stream_write_space;
newsk->sk_mark = inet_rsk(req)->ir_mark; newsk->sk_mark = inet_rsk(req)->ir_mark;
atomic64_set(&newsk->sk_cookie,
atomic64_read(&inet_rsk(req)->ir_cookie));
newicsk->icsk_retransmits = 0; newicsk->icsk_retransmits = 0;
newicsk->icsk_backoff = 0; newicsk->icsk_backoff = 0;
......
...@@ -221,12 +221,13 @@ static int inet_csk_diag_fill(struct sock *sk, ...@@ -221,12 +221,13 @@ static int inet_csk_diag_fill(struct sock *sk,
user_ns, portid, seq, nlmsg_flags, unlh); user_ns, portid, seq, nlmsg_flags, unlh);
} }
static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, static int inet_twsk_diag_fill(struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
const struct inet_diag_req_v2 *req, const struct inet_diag_req_v2 *req,
u32 portid, u32 seq, u16 nlmsg_flags, u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh) const struct nlmsghdr *unlh)
{ {
struct inet_timewait_sock *tw = inet_twsk(sk);
struct inet_diag_msg *r; struct inet_diag_msg *r;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
s32 tmo; s32 tmo;
...@@ -247,7 +248,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, ...@@ -247,7 +248,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r->idiag_retrans = 0; r->idiag_retrans = 0;
r->id.idiag_if = tw->tw_bound_dev_if; r->id.idiag_if = tw->tw_bound_dev_if;
sock_diag_save_cookie(tw, r->id.idiag_cookie); sock_diag_save_cookie(sk, r->id.idiag_cookie);
r->id.idiag_sport = tw->tw_sport; r->id.idiag_sport = tw->tw_sport;
r->id.idiag_dport = tw->tw_dport; r->id.idiag_dport = tw->tw_dport;
...@@ -283,7 +284,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, ...@@ -283,7 +284,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct nlmsghdr *unlh) const struct nlmsghdr *unlh)
{ {
if (sk->sk_state == TCP_TIME_WAIT) if (sk->sk_state == TCP_TIME_WAIT)
return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, return inet_twsk_diag_fill(sk, skb, r, portid, seq,
nlmsg_flags, unlh); nlmsg_flags, unlh);
return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
...@@ -675,7 +676,7 @@ static int inet_twsk_diag_dump(struct sock *sk, ...@@ -675,7 +676,7 @@ static int inet_twsk_diag_dump(struct sock *sk,
if (!inet_diag_bc_sk(bc, sk)) if (!inet_diag_bc_sk(bc, sk))
return 0; return 0;
return inet_twsk_diag_fill(inet_twsk(sk), skb, r, return inet_twsk_diag_fill(sk, skb, r,
NETLINK_CB(cb->skb).portid, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
} }
...@@ -734,7 +735,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, ...@@ -734,7 +735,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_retrans = req->num_retrans; r->idiag_retrans = req->num_retrans;
r->id.idiag_if = sk->sk_bound_dev_if; r->id.idiag_if = sk->sk_bound_dev_if;
sock_diag_save_cookie(req, r->id.idiag_cookie);
BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
offsetof(struct sock, sk_cookie));
sock_diag_save_cookie((struct sock *)ireq, r->id.idiag_cookie);
tmo = req->expires - jiffies; tmo = req->expires - jiffies;
if (tmo < 0) if (tmo < 0)
......
...@@ -195,6 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat ...@@ -195,6 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_ipv6only = 0; tw->tw_ipv6only = 0;
tw->tw_transparent = inet->transparent; tw->tw_transparent = inet->transparent;
tw->tw_prot = sk->sk_prot_creator; tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, hold_net(sock_net(sk))); twsk_net_set(tw, hold_net(sock_net(sk)));
/* /*
* Because we use RCU lookups, we should not set tw_refcnt * Because we use RCU lookups, we should not set tw_refcnt
......
...@@ -346,6 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ...@@ -346,6 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
treq->listener = NULL; treq->listener = NULL;
ireq->ireq_net = sock_net(sk);
/* We throwed the options of the initial SYN away, so we hope /* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8) * the ACK carries the same options again (see RFC1122 4.2.3.8)
......
...@@ -5965,6 +5965,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, ...@@ -5965,6 +5965,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk); tcp_openreq_init(req, &tmp_opt, skb, sk);
inet_rsk(req)->ireq_net = sock_net(sk);
atomic64_set(&inet_rsk(req)->ir_cookie, 0);
af_ops->init_req(req, sk, skb); af_ops->init_req(req, sk, skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment