Commit 52841430 authored by David S. Miller's avatar David S. Miller

Merge branch 'listener_refactor_part_12'

Eric Dumazet says:

====================
inet: tcp listener refactoring, part 12

By adding a pointer back to listener, we are preparing synack rtx
handling to no longer be governed by listener keepalive timer,
as this is the most problematic source of contention on listener
spinlock. Note that TCP FastOpen had such pointer anyway, so we
make it generic.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9f2dbdd9 0470c8ca
......@@ -111,7 +111,7 @@ struct tcp_request_sock_ops;
struct tcp_request_sock {
struct inet_request_sock req;
const struct tcp_request_sock_ops *af_specific;
struct sock *listener; /* needed for TFO */
bool tfo_listener;
u32 rcv_isn;
u32 snt_isn;
u32 snt_synack; /* synack sent time */
......
......@@ -275,11 +275,6 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk,
struct sock *child)
{
reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
/* before letting lookups find us, make sure all req fields
* are committed to memory.
*/
smp_wmb();
atomic_set(&req->rsk_refcnt, 1);
}
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
......
......@@ -81,7 +81,6 @@ struct inet_request_sock {
#define ir_cookie req.__req_common.skc_cookie
#define ireq_net req.__req_common.skc_net
#define ireq_state req.__req_common.skc_state
#define ireq_refcnt req.__req_common.skc_refcnt
#define ireq_family req.__req_common.skc_family
kmemcheck_bitfield_begin(flags);
......@@ -244,25 +243,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
initval);
}
static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
{
struct request_sock *req = reqsk_alloc(ops);
struct inet_request_sock *ireq = inet_rsk(req);
if (req != NULL) {
kmemcheck_annotate_bitfield(ireq, flags);
ireq->opt = NULL;
atomic64_set(&ireq->ir_cookie, 0);
ireq->ireq_state = TCP_NEW_SYN_RECV;
/* Following is temporary. It is coupled with debugging
* helpers in reqsk_put() & reqsk_free()
*/
atomic_set(&ireq->ireq_refcnt, 0);
}
return req;
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct sock *sk_listener);
static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
{
......
......@@ -52,6 +52,7 @@ struct request_sock {
#define rsk_refcnt __req_common.skc_refcnt
struct request_sock *dl_next;
struct sock *rsk_listener;
u16 mss;
u8 num_retrans; /* number of retransmits */
u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
......@@ -67,13 +68,21 @@ struct request_sock {
u32 peer_secid;
};
static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops)
static inline struct request_sock *
reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
{
struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC);
if (req != NULL)
if (req) {
req->rsk_ops = ops;
sock_hold(sk_listener);
req->rsk_listener = sk_listener;
/* Following is temporary. It is coupled with debugging
* helpers in reqsk_put() & reqsk_free()
*/
atomic_set(&req->rsk_refcnt, 0);
}
return req;
}
......@@ -88,6 +97,8 @@ static inline void reqsk_free(struct request_sock *req)
WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
req->rsk_ops->destructor(req);
if (req->rsk_listener)
sock_put(req->rsk_listener);
kmem_cache_free(req->rsk_ops->slab, req);
}
......@@ -286,6 +297,12 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
req->sk = NULL;
req->dl_next = lopt->syn_table[hash];
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
smp_wmb();
atomic_set(&req->rsk_refcnt, 1);
write_lock(&queue->syn_wait_lock);
lopt->syn_table[hash] = req;
write_unlock(&queue->syn_wait_lock);
......
......@@ -153,24 +153,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
* case might also exist in tcp_v4_hnd_req() that will trigger this locking
* order.
*
* When a TFO req is created, it needs to sock_hold its listener to prevent
* the latter data structure from going away.
*
* This function also sets "treq->listener" to NULL and unreference listener
* socket. treq->listener is used by the listener so it is protected by the
* This function also sets "treq->tfo_listener" to false.
* treq->tfo_listener is used by the listener so it is protected by the
* fastopenq->lock in this function.
*/
void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
bool reset)
{
struct sock *lsk = tcp_rsk(req)->listener;
struct fastopen_queue *fastopenq =
inet_csk(lsk)->icsk_accept_queue.fastopenq;
struct sock *lsk = req->rsk_listener;
struct fastopen_queue *fastopenq;
fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
tcp_sk(sk)->fastopen_rsk = NULL;
spin_lock_bh(&fastopenq->lock);
fastopenq->qlen--;
tcp_rsk(req)->listener = NULL;
tcp_rsk(req)->tfo_listener = false;
if (req->sk) /* the child socket hasn't been accepted yet */
goto out;
......@@ -179,7 +177,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
* special RST handling below.
*/
spin_unlock_bh(&fastopenq->lock);
sock_put(lsk);
reqsk_put(req);
return;
}
......@@ -201,5 +198,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
fastopenq->qlen++;
out:
spin_unlock_bh(&fastopenq->lock);
sock_put(lsk);
}
......@@ -624,7 +624,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
req = inet_reqsk_alloc(&dccp_request_sock_ops);
req = inet_reqsk_alloc(&dccp_request_sock_ops, sk);
if (req == NULL)
goto drop;
......@@ -641,7 +641,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
ireq->ir_loc_addr = ip_hdr(skb)->daddr;
ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
write_pnet(&ireq->ireq_net, sock_net(sk));
ireq->ireq_family = AF_INET;
ireq->ir_iif = sk->sk_bound_dev_if;
......
......@@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
req = inet_reqsk_alloc(&dccp6_request_sock_ops);
req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk);
if (req == NULL)
goto drop;
......@@ -403,7 +403,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
write_pnet(&ireq->ireq_net, sock_net(sk));
ireq->ireq_family = AF_INET6;
if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
......
......@@ -293,8 +293,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct sock *newsk;
struct request_sock *req;
struct sock *newsk;
int error;
lock_sock(sk);
......@@ -323,9 +323,11 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
newsk = req->sk;
sk_acceptq_removed(sk);
if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) {
if (sk->sk_protocol == IPPROTO_TCP &&
tcp_rsk(req)->tfo_listener &&
queue->fastopenq) {
spin_lock_bh(&queue->fastopenq->lock);
if (tcp_rsk(req)->listener) {
if (tcp_rsk(req)->tfo_listener) {
/* We are still waiting for the final ACK from 3WHS
* so can't free req now. Instead, we set req->sk to
* NULL to signify that the child socket is taken
......@@ -817,9 +819,9 @@ void inet_csk_listen_stop(struct sock *sk)
percpu_counter_inc(sk->sk_prot->orphan_count);
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) {
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
BUG_ON(tcp_sk(child)->fastopen_rsk != req);
BUG_ON(sk != tcp_rsk(req)->listener);
BUG_ON(sk != req->rsk_listener);
/* Paranoid, to prevent race condition if
* an inbound pkt destined for child is
......@@ -828,7 +830,6 @@ void inet_csk_listen_stop(struct sock *sk)
* tcp_v4_destroy_sock().
*/
tcp_sk(child)->fastopen_rsk = NULL;
sock_put(sk);
}
inet_csk_destroy_sock(child);
......
......@@ -227,11 +227,12 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct sock *child;
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
if (child)
if (child) {
atomic_set(&req->rsk_refcnt, 1);
inet_csk_reqsk_queue_add(sk, req, child);
else
} else {
reqsk_free(req);
}
return child;
}
......@@ -325,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */
if (!req)
goto out;
......@@ -345,8 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
treq->listener = NULL;
write_pnet(&ireq->ireq_net, sock_net(sk));
treq->tfo_listener = false;
ireq->ireq_family = AF_INET;
ireq->ir_iif = sk->sk_bound_dev_if;
......@@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->opt = tcp_v4_save_options(skb);
if (security_inet_conn_request(sk, skb, req)) {
reqsk_put(req);
reqsk_free(req);
goto out;
}
......@@ -378,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
reqsk_put(req);
reqsk_free(req);
goto out;
}
......
......@@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
tp = tcp_sk(child);
tp->fastopen_rsk = req;
/* Do a hold on the listner sk so that if the listener is being
* closed, the child that has been accepted can live on and still
* access listen_lock.
*/
sock_hold(sk);
tcp_rsk(req)->listener = sk;
tcp_rsk(req)->tfo_listener = true;
/* RFC1323: The window in SYN & SYN/ACK segments is never
* scaled. So correct it appropriately.
......@@ -174,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
atomic_set(&req->rsk_refcnt, 1);
/* Add the child socket directly into the accept queue */
inet_csk_reqsk_queue_add(sk, req, child);
......
......@@ -5967,6 +5967,26 @@ static void tcp_openreq_init(struct request_sock *req,
ireq->ir_mark = inet_request_mark(sk, skb);
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct sock *sk_listener)
{
struct request_sock *req = reqsk_alloc(ops, sk_listener);
if (req) {
struct inet_request_sock *ireq = inet_rsk(req);
kmemcheck_annotate_bitfield(ireq, flags);
ireq->opt = NULL;
atomic64_set(&ireq->ir_cookie, 0);
ireq->ireq_state = TCP_NEW_SYN_RECV;
write_pnet(&ireq->ireq_net, sock_net(sk_listener));
}
return req;
}
EXPORT_SYMBOL(inet_reqsk_alloc);
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
......@@ -6004,7 +6024,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop;
}
req = inet_reqsk_alloc(rsk_ops);
req = inet_reqsk_alloc(rsk_ops, sk);
if (!req)
goto drop;
......@@ -6020,7 +6040,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
write_pnet(&inet_rsk(req)->ireq_net, sock_net(sk));
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
......@@ -6097,7 +6116,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (err || want_cookie)
goto drop_and_free;
tcp_rsk(req)->listener = NULL;
tcp_rsk(req)->tfo_listener = false;
af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
}
......
......@@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct sock *child;
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
if (child)
if (child) {
atomic_set(&req->rsk_refcnt, 1);
inet_csk_reqsk_queue_add(sk, req, child);
else
} else {
reqsk_free(req);
}
return child;
}
......@@ -189,14 +190,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
req = inet_reqsk_alloc(&tcp6_request_sock_ops);
req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
if (!req)
goto out;
ireq = inet_rsk(req);
treq = tcp_rsk(req);
treq->listener = NULL;
write_pnet(&ireq->ireq_net, sock_net(sk));
treq->tfo_listener = false;
ireq->ireq_family = AF_INET6;
if (security_inet_conn_request(sk, skb, req))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment