Commit 138470a9 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net/sched: act_ct: fix lockdep splat in tcf_ct_flow_table_get

Convert zones_lock spinlock to zones_mutex mutex,
and struct (tcf_ct_flow_table)->ref to a refcount,
so that control path can use regular GFP_KERNEL allocations
from standard process context. This is more robust
in case of memory pressure.

The refcount is needed because tcf_ct_flow_table_put() can
be called from RCU callback, thus in BH context.

The issue was spotted by syzbot, as rhashtable_init()
was called with a spinlock held, which is bad since GFP_KERNEL
allocations can sleep.

Note to developers : Please make sure your patches are tested
with CONFIG_DEBUG_ATOMIC_SLEEP=y

BUG: sleeping function called from invalid context at mm/slab.h:565
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 9582, name: syz-executor610
2 locks held by syz-executor610/9582:
 #0: ffffffff8a34eb80 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:72 [inline]
 #0: ffffffff8a34eb80 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x3f9/0xad0 net/core/rtnetlink.c:5437
 #1: ffffffff8a3961b8 (zones_lock){+...}, at: spin_lock_bh include/linux/spinlock.h:343 [inline]
 #1: ffffffff8a3961b8 (zones_lock){+...}, at: tcf_ct_flow_table_get+0xa3/0x1700 net/sched/act_ct.c:67
Preemption disabled at:
[<0000000000000000>] 0x0
CPU: 0 PID: 9582 Comm: syz-executor610 Not tainted 5.6.0-rc3-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x188/0x20d lib/dump_stack.c:118
 ___might_sleep.cold+0x1f4/0x23d kernel/sched/core.c:6798
 slab_pre_alloc_hook mm/slab.h:565 [inline]
 slab_alloc_node mm/slab.c:3227 [inline]
 kmem_cache_alloc_node_trace+0x272/0x790 mm/slab.c:3593
 __do_kmalloc_node mm/slab.c:3615 [inline]
 __kmalloc_node+0x38/0x60 mm/slab.c:3623
 kmalloc_node include/linux/slab.h:578 [inline]
 kvmalloc_node+0x61/0xf0 mm/util.c:574
 kvmalloc include/linux/mm.h:645 [inline]
 kvzalloc include/linux/mm.h:653 [inline]
 bucket_table_alloc+0x8b/0x480 lib/rhashtable.c:175
 rhashtable_init+0x3d2/0x750 lib/rhashtable.c:1054
 nf_flow_table_init+0x16d/0x310 net/netfilter/nf_flow_table_core.c:498
 tcf_ct_flow_table_get+0xe33/0x1700 net/sched/act_ct.c:82
 tcf_ct_init+0xba4/0x18a6 net/sched/act_ct.c:1050
 tcf_action_init_1+0x697/0xa20 net/sched/act_api.c:945
 tcf_action_init+0x1e9/0x2f0 net/sched/act_api.c:1001
 tcf_action_add+0xdb/0x370 net/sched/act_api.c:1411
 tc_ctl_action+0x366/0x456 net/sched/act_api.c:1466
 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5440
 netlink_rcv_skb+0x15a/0x410 net/netlink/af_netlink.c:2478
 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
 netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329
 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918
 sock_sendmsg_nosec net/socket.c:652 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:672
 ____sys_sendmsg+0x6b9/0x7d0 net/socket.c:2343
 ___sys_sendmsg+0x100/0x170 net/socket.c:2397
 __sys_sendmsg+0xec/0x1b0 net/socket.c:2430
 do_syscall_64+0xf6/0x790 arch/x86/entry/common.c:294
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4403d9
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffd719af218 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 00000000004403d9
RDX: 0000000000000000 RSI: 0000000020000300 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 0000000000000005 R09: 00000000004002c8
R10: 0000000000000008 R11: 00000000000

Fixes: c34b961a ("net/sched: act_ct: Create nf flow table per zone")
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Paul Blakey <paulb@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Reported-by: default avatarsyzbot <syzkaller@googlegroups.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 376d5307
......@@ -35,15 +35,15 @@
static struct workqueue_struct *act_ct_wq;
static struct rhashtable zones_ht;
static DEFINE_SPINLOCK(zones_lock);
static DEFINE_MUTEX(zones_mutex);
struct tcf_ct_flow_table {
struct rhash_head node; /* In zones tables */
struct rcu_work rwork;
struct nf_flowtable nf_ft;
refcount_t ref;
u16 zone;
u32 ref;
bool dying;
};
......@@ -64,14 +64,15 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
struct tcf_ct_flow_table *ct_ft;
int err = -ENOMEM;
spin_lock_bh(&zones_lock);
mutex_lock(&zones_mutex);
ct_ft = rhashtable_lookup_fast(&zones_ht, &params->zone, zones_params);
if (ct_ft)
goto take_ref;
if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
goto out_unlock;
ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL);
if (!ct_ft)
goto err_alloc;
refcount_set(&ct_ft->ref, 1);
ct_ft->zone = params->zone;
err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
......@@ -84,10 +85,9 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
goto err_init;
__module_get(THIS_MODULE);
take_ref:
out_unlock:
params->ct_ft = ct_ft;
ct_ft->ref++;
spin_unlock_bh(&zones_lock);
mutex_unlock(&zones_mutex);
return 0;
......@@ -96,7 +96,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
err_insert:
kfree(ct_ft);
err_alloc:
spin_unlock_bh(&zones_lock);
mutex_unlock(&zones_mutex);
return err;
}
......@@ -116,13 +116,11 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
{
struct tcf_ct_flow_table *ct_ft = params->ct_ft;
spin_lock_bh(&zones_lock);
if (--params->ct_ft->ref == 0) {
if (refcount_dec_and_test(&params->ct_ft->ref)) {
rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
queue_rcu_work(act_ct_wq, &ct_ft->rwork);
}
spin_unlock_bh(&zones_lock);
}
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment