• Eric Dumazet's avatar
    net/sched: accept TCA_STAB only for root qdisc · 3cb7cf15
    Eric Dumazet authored
    Most qdiscs maintain their backlog using qdisc_pkt_len(skb)
    on the assumption it is invariant between the enqueue()
    and dequeue() handlers.
    
    Unfortunately syzbot can crash a host rather easily using
    a TBF + SFQ combination, with an STAB on SFQ [1]
    
    We can't support TCA_STAB on arbitrary level, this would
    require to maintain per-qdisc storage.
    
    [1]
    [   88.796496] BUG: kernel NULL pointer dereference, address: 0000000000000000
    [   88.798611] #PF: supervisor read access in kernel mode
    [   88.799014] #PF: error_code(0x0000) - not-present page
    [   88.799506] PGD 0 P4D 0
    [   88.799829] Oops: Oops: 0000 [#1] SMP NOPTI
    [   88.800569] CPU: 14 UID: 0 PID: 2053 Comm: b371744477 Not tainted 6.12.0-rc1-virtme #1117
    [   88.801107] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
    [   88.801779] RIP: 0010:sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq
    [ 88.802544] Code: 0f b7 50 12 48 8d 04 d5 00 00 00 00 48 89 d6 48 29 d0 48 8b 91 c0 01 00 00 48 c1 e0 03 48 01 c2 66 83 7a 1a 00 7e c0 48 8b 3a <4c> 8b 07 4c 89 02 49 89 50 08 48 c7 47 08 00 00 00 00 48 c7 07 00
    All code
    ========
       0:	0f b7 50 12          	movzwl 0x12(%rax),%edx
       4:	48 8d 04 d5 00 00 00 	lea    0x0(,%rdx,8),%rax
       b:	00
       c:	48 89 d6             	mov    %rdx,%rsi
       f:	48 29 d0             	sub    %rdx,%rax
      12:	48 8b 91 c0 01 00 00 	mov    0x1c0(%rcx),%rdx
      19:	48 c1 e0 03          	shl    $0x3,%rax
      1d:	48 01 c2             	add    %rax,%rdx
      20:	66 83 7a 1a 00       	cmpw   $0x0,0x1a(%rdx)
      25:	7e c0                	jle    0xffffffffffffffe7
      27:	48 8b 3a             	mov    (%rdx),%rdi
      2a:*	4c 8b 07             	mov    (%rdi),%r8		<-- trapping instruction
      2d:	4c 89 02             	mov    %r8,(%rdx)
      30:	49 89 50 08          	mov    %rdx,0x8(%r8)
      34:	48 c7 47 08 00 00 00 	movq   $0x0,0x8(%rdi)
      3b:	00
      3c:	48                   	rex.W
      3d:	c7                   	.byte 0xc7
      3e:	07                   	(bad)
    	...
    
    Code starting with the faulting instruction
    ===========================================
       0:	4c 8b 07             	mov    (%rdi),%r8
       3:	4c 89 02             	mov    %r8,(%rdx)
       6:	49 89 50 08          	mov    %rdx,0x8(%r8)
       a:	48 c7 47 08 00 00 00 	movq   $0x0,0x8(%rdi)
      11:	00
      12:	48                   	rex.W
      13:	c7                   	.byte 0xc7
      14:	07                   	(bad)
    	...
    [   88.803721] RSP: 0018:ffff9a1f892b7d58 EFLAGS: 00000206
    [   88.804032] RAX: 0000000000000000 RBX: ffff9a1f8420c800 RCX: ffff9a1f8420c800
    [   88.804560] RDX: ffff9a1f81bc1440 RSI: 0000000000000000 RDI: 0000000000000000
    [   88.805056] RBP: ffffffffc04bb0e0 R08: 0000000000000001 R09: 00000000ff7f9a1f
    [   88.805473] R10: 000000000001001b R11: 0000000000009a1f R12: 0000000000000140
    [   88.806194] R13: 0000000000000001 R14: ffff9a1f886df400 R15: ffff9a1f886df4ac
    [   88.806734] FS:  00007f445601a740(0000) GS:ffff9a2e7fd80000(0000) knlGS:0000000000000000
    [   88.807225] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [   88.807672] CR2: 0000000000000000 CR3: 000000050cc46000 CR4: 00000000000006f0
    [   88.808165] Call Trace:
    [   88.808459]  <TASK>
    [   88.808710] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434)
    [   88.809261] ? page_fault_oops (arch/x86/mm/fault.c:715)
    [   88.809561] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:87 ./arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
    [   88.809806] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
    [   88.810074] ? sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq
    [   88.810411] sfq_reset (net/sched/sch_sfq.c:525) sch_sfq
    [   88.810671] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036)
    [   88.810950] tbf_reset (./include/linux/timekeeping.h:169 net/sched/sch_tbf.c:334) sch_tbf
    [   88.811208] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036)
    [   88.811484] netif_set_real_num_tx_queues (./include/linux/spinlock.h:396 ./include/net/sch_generic.h:768 net/core/dev.c:2958)
    [   88.811870] __tun_detach (drivers/net/tun.c:590 drivers/net/tun.c:673)
    [   88.812271] tun_chr_close (drivers/net/tun.c:702 drivers/net/tun.c:3517)
    [   88.812505] __fput (fs/file_table.c:432 (discriminator 1))
    [   88.812735] task_work_run (kernel/task_work.c:230)
    [   88.813016] do_exit (kernel/exit.c:940)
    [   88.813372] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:58 (discriminator 4))
    [   88.813639] ? handle_mm_fault (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/memcontrol.h:1022 ./include/linux/memcontrol.h:1045 ./include/linux/memcontrol.h:1052 mm/memory.c:5928 mm/memory.c:6088)
    [   88.813867] do_group_exit (kernel/exit.c:1070)
    [   88.814138] __x64_sys_exit_group (kernel/exit.c:1099)
    [   88.814490] x64_sys_call (??:?)
    [   88.814791] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
    [   88.815012] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
    [   88.815495] RIP: 0033:0x7f44560f1975
    
    Fixes: 175f9c1b ("net_sched: Add size table for qdiscs")
    Reported-by: default avatarsyzbot <syzkaller@googlegroups.com>
    Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
    Cc: Daniel Borkmann <daniel@iogearbox.net>
    Link: https://patch.msgid.link/20241007184130.3960565-1-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
    3cb7cf15
sch_api.c 58.9 KB