Commit 72ccd920 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'enforce-global-flow-dissector'

Stanislav Fomichev says:

====================
While having a per-net-ns flow dissector programs is convenient for
testing, security-wise it's better to have only one vetted global
flow dissector implementation.

Let's have a convention that when BPF flow dissector is installed
in the root namespace, child namespaces can't override it.

The intended use-case is to attach global BPF flow dissector
early from the init scripts/systemd. Attaching global dissector
is prohibited if some non-root namespace already has flow dissector
attached. Also, attaching to non-root namespace is prohibited
when there is flow dissector attached to the root namespace.

v3:
* drop extra check and empty line (Andrii Nakryiko)

v2:
* EPERM -> EEXIST (Song Liu)
* Make sure we don't have dissector attached to non-root namespaces
  when attaching the global one (Andrii Nakryiko)
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 4564a8bb 1d9626dc
......@@ -142,3 +142,6 @@ BPF flow dissector doesn't support exporting all the metadata that in-kernel
C-based implementation can export. Notable example is single VLAN (802.1Q)
and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
for a set of information that's currently can be exported from the BPF context.
When BPF flow dissector is attached to the root network namespace (machine-wide
policy), users can't override it in their child network namespaces.
......@@ -114,19 +114,46 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
{
struct bpf_prog *attached;
struct net *net;
int ret = 0;
net = current->nsproxy->net_ns;
mutex_lock(&flow_dissector_mutex);
if (net == &init_net) {
/* BPF flow dissector in the root namespace overrides
* any per-net-namespace one. When attaching to root,
* make sure we don't have any BPF program attached
* to the non-root namespaces.
*/
struct net *ns;
for_each_net(ns) {
if (rcu_access_pointer(ns->flow_dissector_prog)) {
ret = -EEXIST;
goto out;
}
}
} else {
/* Make sure root flow dissector is not attached
* when attaching to the non-root namespace.
*/
if (rcu_access_pointer(init_net.flow_dissector_prog)) {
ret = -EEXIST;
goto out;
}
}
attached = rcu_dereference_protected(net->flow_dissector_prog,
lockdep_is_held(&flow_dissector_mutex));
if (attached) {
/* Only one BPF program can be attached at a time */
mutex_unlock(&flow_dissector_mutex);
return -EEXIST;
ret = -EEXIST;
goto out;
}
rcu_assign_pointer(net->flow_dissector_prog, prog);
out:
mutex_unlock(&flow_dissector_mutex);
return 0;
return ret;
}
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
......@@ -910,7 +937,10 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net);
if (net) {
rcu_read_lock();
attached = rcu_dereference(net->flow_dissector_prog);
attached = rcu_dereference(init_net.flow_dissector_prog);
if (!attached)
attached = rcu_dereference(net->flow_dissector_prog);
if (attached) {
struct bpf_flow_keys flow_keys;
......
......@@ -18,19 +18,55 @@ fi
# this is the case and run it with in_netns.sh if it is being run in the root
# namespace.
if [[ -z $(ip netns identify $$) ]]; then
err=0
if bpftool="$(which bpftool)"; then
echo "Testing global flow dissector..."
$bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \
type flow_dissector
if ! unshare --net $bpftool prog attach pinned \
/sys/fs/bpf/flow/flow_dissector flow_dissector; then
echo "Unexpected unsuccessful attach in namespace" >&2
err=1
fi
$bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \
flow_dissector
if unshare --net $bpftool prog attach pinned \
/sys/fs/bpf/flow/flow_dissector flow_dissector; then
echo "Unexpected successful attach in namespace" >&2
err=1
fi
if ! $bpftool prog detach pinned \
/sys/fs/bpf/flow/flow_dissector flow_dissector; then
echo "Failed to detach flow dissector" >&2
err=1
fi
rm -rf /sys/fs/bpf/flow
else
echo "Skipping root flow dissector test, bpftool not found" >&2
fi
# Run the rest of the tests in a net namespace.
../net/in_netns.sh "$0" "$@"
exit $?
fi
err=$(( $err + $? ))
# Determine selftest success via shell exit code
exit_handler()
{
if (( $? == 0 )); then
if (( $err == 0 )); then
echo "selftests: $TESTNAME [PASS]";
else
echo "selftests: $TESTNAME [FAILED]";
fi
exit $err
fi
# Determine selftest success via shell exit code
exit_handler()
{
set +e
# Cleanup
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment