1. 14 Jul, 2020 12 commits
  2. 13 Jul, 2020 5 commits
  3. 12 Jul, 2020 6 commits
    • Christophe JAILLET's avatar
      net: sky2: switch from 'pci_' to 'dma_' API · c86768cf
      Christophe JAILLET authored
      The wrappers in include/linux/pci-dma-compat.h should go away.
      
      The patch has been generated with the coccinelle script below and has been
      hand modified to replace GPF_ with a correct flag.
      It has been compile tested.
      
      When memory is allocated in 'sky2_alloc_buffers()', GFP_KERNEL can be used
      because some other memory allocations in the same function already use this
      flag.
      
      When memory is allocated in 'sky2_probe()', GFP_KERNEL can be used
      because another memory allocations in the same function already uses this
      flag.
      
      @@
      @@
      -    PCI_DMA_BIDIRECTIONAL
      +    DMA_BIDIRECTIONAL
      
      @@
      @@
      -    PCI_DMA_TODEVICE
      +    DMA_TO_DEVICE
      
      @@
      @@
      -    PCI_DMA_FROMDEVICE
      +    DMA_FROM_DEVICE
      
      @@
      @@
      -    PCI_DMA_NONE
      +    DMA_NONE
      
      @@
      expression e1, e2, e3;
      @@
      -    pci_alloc_consistent(e1, e2, e3)
      +    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)
      
      @@
      expression e1, e2, e3;
      @@
      -    pci_zalloc_consistent(e1, e2, e3)
      +    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_free_consistent(e1, e2, e3, e4)
      +    dma_free_coherent(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_map_single(e1, e2, e3, e4)
      +    dma_map_single(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_unmap_single(e1, e2, e3, e4)
      +    dma_unmap_single(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4, e5;
      @@
      -    pci_map_page(e1, e2, e3, e4, e5)
      +    dma_map_page(&e1->dev, e2, e3, e4, e5)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_unmap_page(e1, e2, e3, e4)
      +    dma_unmap_page(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_map_sg(e1, e2, e3, e4)
      +    dma_map_sg(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_unmap_sg(e1, e2, e3, e4)
      +    dma_unmap_sg(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_dma_sync_single_for_cpu(e1, e2, e3, e4)
      +    dma_sync_single_for_cpu(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_dma_sync_single_for_device(e1, e2, e3, e4)
      +    dma_sync_single_for_device(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_dma_sync_sg_for_cpu(e1, e2, e3, e4)
      +    dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_dma_sync_sg_for_device(e1, e2, e3, e4)
      +    dma_sync_sg_for_device(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2;
      @@
      -    pci_dma_mapping_error(e1, e2)
      +    dma_mapping_error(&e1->dev, e2)
      
      @@
      expression e1, e2;
      @@
      -    pci_set_dma_mask(e1, e2)
      +    dma_set_mask(&e1->dev, e2)
      
      @@
      expression e1, e2;
      @@
      -    pci_set_consistent_dma_mask(e1, e2)
      +    dma_set_coherent_mask(&e1->dev, e2)
      Signed-off-by: default avatarChristophe JAILLET <christophe.jaillet@wanadoo.fr>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      c86768cf
    • Christophe JAILLET's avatar
      net: skge: switch from 'pci_' to 'dma_' API · 6d905436
      Christophe JAILLET authored
      The wrappers in include/linux/pci-dma-compat.h should go away.
      
      The patch has been generated with the coccinelle script below and has been
      hand modified to replace GPF_ with a correct flag.
      It has been compile tested.
      
      When memory is allocated in 'skge_up()', GFP_KERNEL can be used because
      some other memory allocations done a few lines below in 'skge_ring_alloc()'
      already use this flag.
      
      @@
      @@
      -    PCI_DMA_BIDIRECTIONAL
      +    DMA_BIDIRECTIONAL
      
      @@
      @@
      -    PCI_DMA_TODEVICE
      +    DMA_TO_DEVICE
      
      @@
      @@
      -    PCI_DMA_FROMDEVICE
      +    DMA_FROM_DEVICE
      
      @@
      @@
      -    PCI_DMA_NONE
      +    DMA_NONE
      
      @@
      expression e1, e2, e3;
      @@
      -    pci_alloc_consistent(e1, e2, e3)
      +    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)
      
      @@
      expression e1, e2, e3;
      @@
      -    pci_zalloc_consistent(e1, e2, e3)
      +    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_free_consistent(e1, e2, e3, e4)
      +    dma_free_coherent(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_map_single(e1, e2, e3, e4)
      +    dma_map_single(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_unmap_single(e1, e2, e3, e4)
      +    dma_unmap_single(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4, e5;
      @@
      -    pci_map_page(e1, e2, e3, e4, e5)
      +    dma_map_page(&e1->dev, e2, e3, e4, e5)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_unmap_page(e1, e2, e3, e4)
      +    dma_unmap_page(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_map_sg(e1, e2, e3, e4)
      +    dma_map_sg(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_unmap_sg(e1, e2, e3, e4)
      +    dma_unmap_sg(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_dma_sync_single_for_cpu(e1, e2, e3, e4)
      +    dma_sync_single_for_cpu(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_dma_sync_single_for_device(e1, e2, e3, e4)
      +    dma_sync_single_for_device(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_dma_sync_sg_for_cpu(e1, e2, e3, e4)
      +    dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2, e3, e4;
      @@
      -    pci_dma_sync_sg_for_device(e1, e2, e3, e4)
      +    dma_sync_sg_for_device(&e1->dev, e2, e3, e4)
      
      @@
      expression e1, e2;
      @@
      -    pci_dma_mapping_error(e1, e2)
      +    dma_mapping_error(&e1->dev, e2)
      
      @@
      expression e1, e2;
      @@
      -    pci_set_dma_mask(e1, e2)
      +    dma_set_mask(&e1->dev, e2)
      
      @@
      expression e1, e2;
      @@
      -    pci_set_consistent_dma_mask(e1, e2)
      +    dma_set_coherent_mask(&e1->dev, e2)
      Signed-off-by: default avatarChristophe JAILLET <christophe.jaillet@wanadoo.fr>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      6d905436
    • David S. Miller's avatar
      Merge branch 'Fix-MTU-warnings-for-fec-mv886xxx-combo' · 2faf34ba
      David S. Miller authored
      Andrew Lunn says:
      
      ====================
      Fix MTU warnings for fec/mv886xxx combo
      
      Since changing the MTU of dsa slave interfaces was implemented, the
      fec/mv88e6xxx combo has been giving warnings:
      
      [    2.275925] mv88e6085 0.2:00: nonfatal error -95 setting MTU on port 9
      [    2.284306] eth1: mtu greater than device maximum
      [    2.287759] fec 400d1000.ethernet eth1: error -22 setting MTU to include DSA overhead
      
      This patchset adds support for changing the MTU on mv88e6xxx switches,
      which do support jumbo frames. And it modifies the FEC driver to
      support its true MTU range, which is larger than the default Ethernet
      MTU.
      ====================
      Tested-by: default avatarChris Healy <cphealy@gmail.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      2faf34ba
    • Andrew Lunn's avatar
      net: fec: Set max MTU size to allow the MTU to be changed · 59193053
      Andrew Lunn authored
      The FEC allocates 2K buffers, but looses some of it due to
      alignment. It can however support an MTU bigger than the default. This
      is particularly interesting when used in combination with Ethernet
      switches supporting DSA, which have extra headers. The DSA core will
      try to increase the MTU to support these extra headers. If the max
      size defaults to that of standard Ethernet we get a warning. By
      setting the max to what the driver actually supports, we avoid this
      warning.
      Signed-off-by: default avatarAndrew Lunn <andrew@lunn.ch>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      59193053
    • Andrew Lunn's avatar
      net: dsa: mv88e6xxx: Implement MTU change · 2a550aec
      Andrew Lunn authored
      The Marvell Switches support jumbo packages. So implement the
      callbacks needed for changing the MTU.
      Signed-off-by: default avatarAndrew Lunn <andrew@lunn.ch>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      2a550aec
    • Nikolay Aleksandrov's avatar
      net: bridge: notify on vlan tunnel changes done via the old api · 94339443
      Nikolay Aleksandrov authored
      If someone uses the old vlan API to configure tunnel mappings we'll only
      generate the old-style full port notification. That would be a problem
      if we are monitoring the new vlan notifications for changes. The patch
      resolves the issue by adding vlan notifications to the old tunnel netlink
      code. As usual we try to compress the notifications for as many vlans
      in a range as possible, thus a vlan tunnel change is considered able
      to enter the "current" vlan notification range if:
       1. vlan exists
       2. it has actually changed (curr_change == true)
       3. it passes all standard vlan notification range checks done by
          br_vlan_can_enter_range() such as option equality, id continuity etc
      
      Note that vlan tunnel changes (add/del) are considered a part of vlan
      options so only RTM_NEWVLAN notification is generated with the relevant
      information inside.
      Signed-off-by: default avatarNikolay Aleksandrov <nikolay@cumulusnetworks.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      94339443
  4. 11 Jul, 2020 5 commits
    • David S. Miller's avatar
      Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net · 71930d61
      David S. Miller authored
      All conflicts seemed rather trivial, with some guidance from
      Saeed Mameed on the tc_ct.c one.
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      71930d61
    • Linus Torvalds's avatar
      Merge tag 'libnvdimm-fix-v5.8-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm · 1df0d896
      Linus Torvalds authored
      Pull libnvdimm fix from Dan Williams:
       "A one-line Fix for key ring search permissions to address a regression
        from -rc1"
      
      * tag 'libnvdimm-fix-v5.8-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
        libnvdimm/security: Fix key lookup permissions
      1df0d896
    • Linus Torvalds's avatar
      Merge tag '5.8-rc4-smb3-fixes' of git://git.samba.org/sfrench/cifs-2.6 · 5ab39e08
      Linus Torvalds authored
      Pull cifs fixes from Steve French:
       "Four cifs/smb3 fixes: the three for stable fix problems found recently
        with change notification including a reference count leak"
      
      * tag '5.8-rc4-smb3-fixes' of git://git.samba.org/sfrench/cifs-2.6:
        cifs: update internal module version number
        cifs: fix reference leak for tlink
        smb3: fix unneeded error message on change notify
        cifs: remove the retry in cifs_poxis_lock_set
        smb3: fix access denied on change notify request to some servers
      5ab39e08
    • Linus Torvalds's avatar
      Merge tag 'inclusive-terminology' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/linux · 49decddd
      Linus Torvalds authored
      Pull coding style terminology documentation from Dan Williams:
       "The discussion has tapered off as well as the incoming ack, review,
        and sign-off tags. I did not see a reason to wait for the next merge
        window"
      
      * tag 'inclusive-terminology' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/linux:
        CodingStyle: Inclusive Terminology
      49decddd
    • Linus Torvalds's avatar
      Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net · 5a764898
      Linus Torvalds authored
      Pull networking fixes from David Miller:
      
       1) Restore previous behavior of CAP_SYS_ADMIN wrt loading networking
          BPF programs, from Maciej Żenczykowski.
      
       2) Fix dropped broadcasts in mac80211 code, from Seevalamuthu
          Mariappan.
      
       3) Slay memory leak in nl80211 bss color attribute parsing code, from
          Luca Coelho.
      
       4) Get route from skb properly in ip_route_use_hint(), from Miaohe Lin.
      
       5) Don't allow anything other than ARPHRD_ETHER in llc code, from Eric
          Dumazet.
      
       6) xsk code dips too deeply into DMA mapping implementation internals.
          Add dma_need_sync and use it. From Christoph Hellwig
      
       7) Enforce power-of-2 for BPF ringbuf sizes. From Andrii Nakryiko.
      
       8) Check for disallowed attributes when loading flow dissector BPF
          programs. From Lorenz Bauer.
      
       9) Correct packet injection to L3 tunnel devices via AF_PACKET, from
          Jason A. Donenfeld.
      
      10) Don't advertise checksum offload on ipa devices that don't support
          it. From Alex Elder.
      
      11) Resolve several issues in TCP MD5 signature support. Missing memory
          barriers, bogus options emitted when using syncookies, and failure
          to allow md5 key changes in established states. All from Eric
          Dumazet.
      
      12) Fix interface leak in hsr code, from Taehee Yoo.
      
      13) VF reset fixes in hns3 driver, from Huazhong Tan.
      
      14) Make loopback work again with ipv6 anycast, from David Ahern.
      
      15) Fix TX starvation under high load in fec driver, from Tobias
          Waldekranz.
      
      16) MLD2 payload lengths not checked properly in bridge multicast code,
          from Linus Lüssing.
      
      17) Packet scheduler code that wants to find the inner protocol
          currently only works for one level of VLAN encapsulation. Allow
          Q-in-Q situations to work properly here, from Toke
          Høiland-Jørgensen.
      
      18) Fix route leak in l2tp, from Xin Long.
      
      19) Resolve conflict between the sk->sk_user_data usage of bpf reuseport
          support and various protocols. From Martin KaFai Lau.
      
      20) Fix socket cgroup v2 reference counting in some situations, from
          Cong Wang.
      
      21) Cure memory leak in mlx5 connection tracking offload support, from
          Eli Britstein.
      
      * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (146 commits)
        mlxsw: pci: Fix use-after-free in case of failed devlink reload
        mlxsw: spectrum_router: Remove inappropriate usage of WARN_ON()
        net: macb: fix call to pm_runtime in the suspend/resume functions
        net: macb: fix macb_suspend() by removing call to netif_carrier_off()
        net: macb: fix macb_get/set_wol() when moving to phylink
        net: macb: mark device wake capable when "magic-packet" property present
        net: macb: fix wakeup test in runtime suspend/resume routines
        bnxt_en: fix NULL dereference in case SR-IOV configuration fails
        libbpf: Fix libbpf hashmap on (I)LP32 architectures
        net/mlx5e: CT: Fix memory leak in cleanup
        net/mlx5e: Fix port buffers cell size value
        net/mlx5e: Fix 50G per lane indication
        net/mlx5e: Fix CPU mapping after function reload to avoid aRFS RX crash
        net/mlx5e: Fix VXLAN configuration restore after function reload
        net/mlx5e: Fix usage of rcu-protected pointer
        net/mxl5e: Verify that rpriv is not NULL
        net/mlx5: E-Switch, Fix vlan or qos setting in legacy mode
        net/mlx5: Fix eeprom support for SFP module
        cgroup: Fix sock_cgroup_data on big-endian.
        selftests: bpf: Fix detach from sockmap tests
        ...
      5a764898
  5. 10 Jul, 2020 12 commits
    • Nathan Chancellor's avatar
      mips: Remove compiler check in unroll macro · 9321f1aa
      Nathan Chancellor authored
      CONFIG_CC_IS_GCC is undefined when Clang is used, which breaks the build
      (see our Travis link below).
      
      Clang 8 was chosen as a minimum version for this check because there
      were some improvements around __builtin_constant_p in that release. In
      reality, MIPS was not even buildable until clang 9 so that check was not
      technically necessary. Just remove all compiler checks and just assume
      that we have a working compiler.
      
      Fixes: d4e60453 ("Restore gcc check in mips asm/unroll.h")
      Link: https://travis-ci.com/github/ClangBuiltLinux/continuous-integration/jobs/359642821Signed-off-by: default avatarNathan Chancellor <natechancellor@gmail.com>
      Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
      9321f1aa
    • Kuniyuki Iwashima's avatar
      inet: Remove an unnecessary argument of syn_ack_recalc(). · a594920f
      Kuniyuki Iwashima authored
      Commit 0c3d79bc ("tcp: reduce SYN-ACK
      retrans for TCP_DEFER_ACCEPT") introduces syn_ack_recalc() which decides
      if a minisock is held and a SYN+ACK is retransmitted or not.
      
      If rskq_defer_accept is not zero in syn_ack_recalc(), max_retries always
      has the same value because max_retries is overwritten by rskq_defer_accept
      in reqsk_timer_handler().
      
      This commit adds three changes:
      - remove redundant non-zero check for rskq_defer_accept in
         reqsk_timer_handler().
      - remove max_retries from the arguments of syn_ack_recalc() and use
         rskq_defer_accept instead.
      - rename thresh to max_syn_ack_retries for readability.
      Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.co.jp>
      Reviewed-by: default avatarBenjamin Herrenschmidt <benh@amazon.com>
      CC: Julian Anastasov <ja@ssi.bg>
      Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      a594920f
    • David S. Miller's avatar
      Merge branch 'mlxsw-Various-fixes' · 1195c7ce
      David S. Miller authored
      Ido Schimmel says:
      
      ====================
      mlxsw: Various fixes
      
      Fix two issues found by syzkaller.
      
      Patch #1 removes inappropriate usage of WARN_ON() following memory
      allocation failure. Constantly triggered when syzkaller injects faults.
      
      Patch #2 fixes a use-after-free that can be triggered by 'devlink dev
      info' following a failed devlink reload.
      ====================
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      1195c7ce
    • Ido Schimmel's avatar
      mlxsw: pci: Fix use-after-free in case of failed devlink reload · c4317b11
      Ido Schimmel authored
      In case devlink reload failed, it is possible to trigger a
      use-after-free when querying the kernel for device info via 'devlink dev
      info' [1].
      
      This happens because as part of the reload error path the PCI command
      interface is de-initialized and its mailboxes are freed. When the
      devlink '->info_get()' callback is invoked the device is queried via the
      command interface and the freed mailboxes are accessed.
      
      Fix this by initializing the command interface once during probe and not
      during every reload.
      
      This is consistent with the other bus used by mlxsw (i.e., 'mlxsw_i2c')
      and also allows user space to query the running firmware version (for
      example) from the device after a failed reload.
      
      [1]
      BUG: KASAN: use-after-free in memcpy include/linux/string.h:406 [inline]
      BUG: KASAN: use-after-free in mlxsw_pci_cmd_exec+0x177/0xa60 drivers/net/ethernet/mellanox/mlxsw/pci.c:1675
      Write of size 4096 at addr ffff88810ae32000 by task syz-executor.1/2355
      
      CPU: 1 PID: 2355 Comm: syz-executor.1 Not tainted 5.8.0-rc2+ #29
      Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
      Call Trace:
       __dump_stack lib/dump_stack.c:77 [inline]
       dump_stack+0xf6/0x16e lib/dump_stack.c:118
       print_address_description.constprop.0+0x1c/0x250 mm/kasan/report.c:383
       __kasan_report mm/kasan/report.c:513 [inline]
       kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
       check_memory_region_inline mm/kasan/generic.c:186 [inline]
       check_memory_region+0x14e/0x1b0 mm/kasan/generic.c:192
       memcpy+0x39/0x60 mm/kasan/common.c:106
       memcpy include/linux/string.h:406 [inline]
       mlxsw_pci_cmd_exec+0x177/0xa60 drivers/net/ethernet/mellanox/mlxsw/pci.c:1675
       mlxsw_cmd_exec+0x249/0x550 drivers/net/ethernet/mellanox/mlxsw/core.c:2335
       mlxsw_cmd_access_reg drivers/net/ethernet/mellanox/mlxsw/cmd.h:859 [inline]
       mlxsw_core_reg_access_cmd drivers/net/ethernet/mellanox/mlxsw/core.c:1938 [inline]
       mlxsw_core_reg_access+0x2f6/0x540 drivers/net/ethernet/mellanox/mlxsw/core.c:1985
       mlxsw_reg_query drivers/net/ethernet/mellanox/mlxsw/core.c:2000 [inline]
       mlxsw_devlink_info_get+0x17f/0x6e0 drivers/net/ethernet/mellanox/mlxsw/core.c:1090
       devlink_nl_info_fill.constprop.0+0x13c/0x2d0 net/core/devlink.c:4588
       devlink_nl_cmd_info_get_dumpit+0x246/0x460 net/core/devlink.c:4648
       genl_lock_dumpit+0x85/0xc0 net/netlink/genetlink.c:575
       netlink_dump+0x515/0xe50 net/netlink/af_netlink.c:2245
       __netlink_dump_start+0x53d/0x830 net/netlink/af_netlink.c:2353
       genl_family_rcv_msg_dumpit.isra.0+0x296/0x300 net/netlink/genetlink.c:638
       genl_family_rcv_msg net/netlink/genetlink.c:733 [inline]
       genl_rcv_msg+0x78d/0x9d0 net/netlink/genetlink.c:753
       netlink_rcv_skb+0x152/0x440 net/netlink/af_netlink.c:2469
       genl_rcv+0x24/0x40 net/netlink/genetlink.c:764
       netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
       netlink_unicast+0x53a/0x750 net/netlink/af_netlink.c:1329
       netlink_sendmsg+0x850/0xd90 net/netlink/af_netlink.c:1918
       sock_sendmsg_nosec net/socket.c:652 [inline]
       sock_sendmsg+0x150/0x190 net/socket.c:672
       ____sys_sendmsg+0x6d8/0x840 net/socket.c:2363
       ___sys_sendmsg+0xff/0x170 net/socket.c:2417
       __sys_sendmsg+0xe5/0x1b0 net/socket.c:2450
       do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359
       entry_SYSCALL_64_after_hwframe+0x44/0xa9
      
      Fixes: a9c8336f ("mlxsw: core: Add support for devlink info command")
      Signed-off-by: default avatarIdo Schimmel <idosch@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      c4317b11
    • Ido Schimmel's avatar
      mlxsw: spectrum_router: Remove inappropriate usage of WARN_ON() · d9d54202
      Ido Schimmel authored
      We should not trigger a warning when a memory allocation fails. Remove
      the WARN_ON().
      
      The warning is constantly triggered by syzkaller when it is injecting
      faults:
      
      [ 2230.758664] FAULT_INJECTION: forcing a failure.
      [ 2230.758664] name failslab, interval 1, probability 0, space 0, times 0
      [ 2230.762329] CPU: 3 PID: 1407 Comm: syz-executor.0 Not tainted 5.8.0-rc2+ #28
      ...
      [ 2230.898175] WARNING: CPU: 3 PID: 1407 at drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:6265 mlxsw_sp_router_fib_event+0xfad/0x13e0
      [ 2230.898179] Kernel panic - not syncing: panic_on_warn set ...
      [ 2230.898183] CPU: 3 PID: 1407 Comm: syz-executor.0 Not tainted 5.8.0-rc2+ #28
      [ 2230.898190] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
      
      Fixes: 3057224e ("mlxsw: spectrum_router: Implement FIB offload in deferred work")
      Signed-off-by: default avatarIdo Schimmel <idosch@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      d9d54202
    • David S. Miller's avatar
      Merge branch 'devlink-health' · ac4cd478
      David S. Miller authored
      Moshe Shemesh says:
      
      ====================
      Add devlink-health support for devlink ports
      
      Implement support for devlink health reporters on per-port basis.
      
      This patchset comes to fix a design issue as some health reporters report
      on errors and run recovery on device level while the actual functionality
      is on port level. As for the current implemented devlink health reporters
      it is relevant only to Tx and Rx reporters of mlx5, which has only one
      port, so no real effect on functionality, but this should be fixed before
      more drivers will use devlink health reporters.
      
      First part in the series prepares common functions parts for health
      reporter implementation. Second introduces required API to devlink-health
      and mlx5e ones demonstrate its usage and implement the feature for mlx5
      driver.
      
      The per-port reporter functionality is achieved by adding a list of
      devlink_health_reporters to devlink_port struct in a manner similar to
      existing device infrastructure. This is the only major difference and
      it makes possible to fully reuse device reporters operations.
      The effect will be seen in conjunction with iproute2 additions and
      will affect all devlink health commands. User can distinguish between
      device and port reporters by looking at a devlink handle. Port reporters
      have a port index at the end of the address and such addresses can be
      provided as a parameter in every place where devlink-health accepted it.
      These can be obtained from devlink port show command.
      For example:
      $ devlink health show
      pci/0000:00:0a.0:
        reporter fw
          state healthy error 0 recover 0 auto_dump true
      pci/0000:00:0a.0/1:
        reporter tx
          state healthy error 0 recover 0 grace_period 500 auto_recover true auto_dump true
      $ devlink health set pci/0000:00:0a.0/1 reporter tx grace_period 1000 \
      auto_recover false auto_dump false
      $ devlink health show pci/0000:00:0a.0/1 reporter tx
      pci/0000:00:0a.0/1:
        reporter tx
          state healthy error 0 recover 0 grace_period 1000 auto_recover flase auto_dump false
      
      Note: User can use the same devlink health uAPI commands can get now either
      port health reporter or device health reporter.
      For example, the recover command:
      Before this patchset: devlink health recover DEV reporter REPORTER_NAME
      After this patchset: devlink health recover { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME
      
      Changes v1 -> v2:
      Fixed functions comment to match parameters list.
      
      Changes v2 -> v3:
      Added motivation to cover letter and note on uAPI.
      ====================
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      ac4cd478
    • Vladyslav Tarasiuk's avatar
      net/mlx5e: Move devlink-health rx and tx reporters to devlink port · b7e93bb6
      Vladyslav Tarasiuk authored
      Utilize new devlink-health port reporters API to move rx and tx
      reporters from device to port.
      Signed-off-by: default avatarVladyslav Tarasiuk <vladyslavt@mellanox.com>
      Reviewed-by: default avatarMoshe Shemesh <moshe@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      b7e93bb6
    • Vladyslav Tarasiuk's avatar
      net/mlx5e: Move devlink port register and unregister calls · 4d54d325
      Vladyslav Tarasiuk authored
      Register devlink ports upon NIC init. TX and RX health reporters handle
      errors which may occur early on at driver initialization. And because
      these reporters are to be moved to port context, they require devlink
      ports to be already registered.
      Signed-off-by: default avatarVladyslav Tarasiuk <vladyslavt@mellanox.com>
      Reviewed-by: default avatarMoshe Shemesh <moshe@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      4d54d325
    • Vladyslav Tarasiuk's avatar
      devlink: Add devlink health port reporters API · 15c724b9
      Vladyslav Tarasiuk authored
      In order to use new devlink port health reporters infrastructure, add
      corresponding constructor and destructor functions.
      Signed-off-by: default avatarVladyslav Tarasiuk <vladyslavt@mellanox.com>
      Reviewed-by: default avatarMoshe Shemesh <moshe@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      15c724b9
    • Vladyslav Tarasiuk's avatar
      devlink: Implement devlink health reporters on per-port basis · f4f54166
      Vladyslav Tarasiuk authored
      Add devlink-health reporter support on per-port basis.
      The main difference existing devlink-health is that port reporters are
      stored in per-devlink_port lists. Upon creation of such health reporter the
      reference to a port it belongs to is stored in reporter struct.
      
      Fill the port index attribute in devlink-health response to
      allow devlink userspace utility to distinguish between device and port
      reporters.
      Signed-off-by: default avatarVladyslav Tarasiuk <vladyslavt@mellanox.com>
      Reviewed-by: default avatarMoshe Shemesh <moshe@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      f4f54166
    • Vladyslav Tarasiuk's avatar
      devlink: Create generic devlink health reporter search function · bd821005
      Vladyslav Tarasiuk authored
      Add a generic __devlink_health_reporter_find_by_name() that can be used
      with arbitrary devlink health reporter list.
      Signed-off-by: default avatarVladyslav Tarasiuk <vladyslavt@mellanox.com>
      Reviewed-by: default avatarMoshe Shemesh <moshe@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      bd821005
    • Vladyslav Tarasiuk's avatar
      devlink: Rework devlink health reporter destructor · 3c5584bf
      Vladyslav Tarasiuk authored
      Devlink keeps its own reference to every reporter in a list and inits
      refcount to 1 upon reporter's creation. Existing destructor waits to
      free the memory indefinitely using msleep() until all references except
      devlink's own are put.
      
      Rework this mechanism by moving memory free routine to a separate
      function, which is called when the last reporter reference is put.
      
      Besides, it allows to call __devlink_health_reporter_destroy() while
      locked on a reporters list mutex in symmetry to
      __devlink_health_reporter_create(), which is required in follow-up
      patch.
      Signed-off-by: default avatarVladyslav Tarasiuk <vladyslavt@mellanox.com>
      Reviewed-by: default avatarMoshe Shemesh <moshe@mellanox.com>
      Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
      Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
      3c5584bf