Commit 528269fe authored by Paolo Abeni's avatar Paolo Abeni

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2024-07-09

The following pull-request contains BPF updates for your *net* tree.

We've added 3 non-merge commits during the last 1 day(s) which contain
a total of 5 files changed, 81 insertions(+), 11 deletions(-).

The main changes are:

1) Fix a use-after-free in a corner case where tcx_entry got released too
   early. Also add BPF test coverage along with the fix, from Daniel Borkmann.

2) Fix a kernel panic on Loongarch in sk_msg_recvmsg() which got triggered
   by running BPF sockmap selftests, from Geliang Tang.

bpf-for-netdev

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  skmsg: Skip zero length skb in sk_msg_recvmsg
  selftests/bpf: Extend tcx tests to cover late tcx_entry release
  bpf: Fix too early release of tcx_entry
====================

Link: https://patch.msgid.link/20240709091452.27840-1-daniel@iogearbox.netSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 0913ec33 f0c18025
......@@ -13,7 +13,7 @@ struct mini_Qdisc;
struct tcx_entry {
struct mini_Qdisc __rcu *miniq;
struct bpf_mprog_bundle bundle;
bool miniq_active;
u32 miniq_active;
struct rcu_head rcu;
};
......@@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress)
tcx_dec();
}
static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
const bool active)
static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry)
{
ASSERT_RTNL();
tcx_entry(entry)->miniq_active = active;
tcx_entry(entry)->miniq_active++;
}
static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry)
{
ASSERT_RTNL();
tcx_entry(entry)->miniq_active--;
}
static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
......
......@@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
page = sg_page(sge);
if (copied + copy > len)
copy = len - copied;
copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (copy)
copy = copy_page_to_iter(page, sge->offset, copy, iter);
if (!copy) {
copied = copied ? copied : -EFAULT;
goto out;
......
......@@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
tcx_miniq_set_active(entry, true);
tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
......@@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->block, sch, &q->block_info);
if (entry) {
tcx_miniq_set_active(entry, false);
tcx_miniq_dec(entry);
if (!tcx_entry_is_active(entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(entry);
......@@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, true, &created);
if (!entry)
return -ENOMEM;
tcx_miniq_set_active(entry, true);
tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, true);
......@@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
entry = tcx_entry_fetch_or_create(dev, false, &created);
if (!entry)
return -ENOMEM;
tcx_miniq_set_active(entry, true);
tcx_miniq_inc(entry);
mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
if (created)
tcx_entry_update(dev, entry, false);
......@@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch)
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
if (ingress_entry) {
tcx_miniq_set_active(ingress_entry, false);
tcx_miniq_dec(ingress_entry);
if (!tcx_entry_is_active(ingress_entry)) {
tcx_entry_update(dev, NULL, true);
tcx_entry_free(ingress_entry);
......@@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch)
}
if (egress_entry) {
tcx_miniq_set_active(egress_entry, false);
tcx_miniq_dec(egress_entry);
if (!tcx_entry_is_active(egress_entry)) {
tcx_entry_update(dev, NULL, false);
tcx_entry_free(egress_entry);
......
......@@ -58,9 +58,12 @@ CONFIG_MPLS=y
CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_CLS_BPF=y
CONFIG_NET_CLS_FLOWER=y
CONFIG_NET_CLS_MATCHALL=y
CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NET_IPGRE=y
......
......@@ -9,6 +9,8 @@
#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
#include "test_tc_link.skel.h"
#include "netlink_helpers.h"
#include "tc_helpers.h"
void serial_test_tc_links_basic(void)
......@@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void)
test_tc_links_ingress(BPF_TCX_INGRESS, false, false);
}
struct qdisc_req {
struct nlmsghdr n;
struct tcmsg t;
char buf[1024];
};
static int qdisc_replace(int ifindex, const char *kind, bool block)
{
struct rtnl_handle rth = { .fd = -1 };
struct qdisc_req req;
int err;
err = rtnl_open(&rth, 0);
if (!ASSERT_OK(err, "open_rtnetlink"))
return err;
memset(&req, 0, sizeof(req));
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
req.n.nlmsg_type = RTM_NEWQDISC;
req.t.tcm_family = AF_UNSPEC;
req.t.tcm_ifindex = ifindex;
req.t.tcm_parent = 0xfffffff1;
addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1);
if (block)
addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1);
err = rtnl_talk(&rth, &req.n, NULL);
ASSERT_OK(err, "talk_rtnetlink");
rtnl_close(&rth);
return err;
}
void serial_test_tc_links_dev_chain0(void)
{
int err, ifindex;
ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth");
ifindex = if_nametoindex("foo");
ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
err = qdisc_replace(ifindex, "ingress", true);
if (!ASSERT_OK(err, "attaching ingress"))
goto cleanup;
ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block");
err = qdisc_replace(ifindex, "clsact", false);
if (!ASSERT_OK(err, "attaching clsact"))
goto cleanup;
/* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s
* triggered the issue without the fix reliably 100% of the time.
*/
sleep(5);
ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter");
cleanup:
ASSERT_OK(system("ip link del dev foo"), "del veth");
ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed");
ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed");
}
static void test_tc_links_dev_mixed(int target)
{
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment