Commit fcb905d8 authored by Martin KaFai Lau's avatar Martin KaFai Lau

Merge branch 'bpf_redirect_peer fixes'

Daniel Borkmann says:

====================
This fixes bpf_redirect_peer stats accounting for veth and netkit,
and adds tstats in the first place for the latter. Utilise indirect
call wrapper for bpf_redirect_peer, and improve test coverage of the
latter also for netkit devices. Details in the patches, thanks!

The series was targeted at bpf originally, and is done here as well,
so it can trigger BPF CI. Jakub, if you think directly going via net
is better since the majority of the diff touches net anyway, that is
fine, too.

Thanks!

v2 -> v3:
  - Add kdoc for pcpu_stat_type (Simon)
  - Reject invalid type value in netdev_do_alloc_pcpu_stats (Simon)
  - Add Reviewed-by tags from list
v1 -> v2:
  - Move stats allocation/freeing into net core (Jakub)
  - As prepwork for the above, move vrf's dstats over into the core
  - Add a check into stats alloc to enforce tstats upon
    implementing ndo_get_peer_dev
  - Add Acked-by tags from list

Daniel Borkmann (6):
  net, vrf: Move dstats structure to core
  net: Move {l,t,d}stats allocation to core and convert veth & vrf
  netkit: Add tstats per-CPU traffic counters
  bpf, netkit: Add indirect call wrapper for fetching peer dev
  selftests/bpf: De-veth-ize the tc_redirect test case
  selftests/bpf: Add netkit to tc_redirect selftest
====================
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents 76df934c adfeae2d
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/netfilter_netdev.h> #include <linux/netfilter_netdev.h>
#include <linux/bpf_mprog.h> #include <linux/bpf_mprog.h>
#include <linux/indirect_call_wrapper.h>
#include <net/netkit.h> #include <net/netkit.h>
#include <net/dst.h> #include <net/dst.h>
...@@ -68,6 +69,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -68,6 +69,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
netdev_tx_t ret_dev = NET_XMIT_SUCCESS; netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
const struct bpf_mprog_entry *entry; const struct bpf_mprog_entry *entry;
struct net_device *peer; struct net_device *peer;
int len = skb->len;
rcu_read_lock(); rcu_read_lock();
peer = rcu_dereference(nk->peer); peer = rcu_dereference(nk->peer);
...@@ -85,15 +87,22 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -85,15 +87,22 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
case NETKIT_PASS: case NETKIT_PASS:
skb->protocol = eth_type_trans(skb, skb->dev); skb->protocol = eth_type_trans(skb, skb->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
__netif_rx(skb); if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
dev_sw_netstats_tx_add(dev, 1, len);
dev_sw_netstats_rx_add(peer, len);
} else {
goto drop_stats;
}
break; break;
case NETKIT_REDIRECT: case NETKIT_REDIRECT:
dev_sw_netstats_tx_add(dev, 1, len);
skb_do_redirect(skb); skb_do_redirect(skb);
break; break;
case NETKIT_DROP: case NETKIT_DROP:
default: default:
drop: drop:
kfree_skb(skb); kfree_skb(skb);
drop_stats:
dev_core_stats_tx_dropped_inc(dev); dev_core_stats_tx_dropped_inc(dev);
ret_dev = NET_XMIT_DROP; ret_dev = NET_XMIT_DROP;
break; break;
...@@ -169,11 +178,18 @@ static void netkit_set_headroom(struct net_device *dev, int headroom) ...@@ -169,11 +178,18 @@ static void netkit_set_headroom(struct net_device *dev, int headroom)
rcu_read_unlock(); rcu_read_unlock();
} }
static struct net_device *netkit_peer_dev(struct net_device *dev) INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
{ {
return rcu_dereference(netkit_priv(dev)->peer); return rcu_dereference(netkit_priv(dev)->peer);
} }
static void netkit_get_stats(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
dev_fetch_sw_netstats(stats, dev->tstats);
stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
}
static void netkit_uninit(struct net_device *dev); static void netkit_uninit(struct net_device *dev);
static const struct net_device_ops netkit_netdev_ops = { static const struct net_device_ops netkit_netdev_ops = {
...@@ -184,6 +200,7 @@ static const struct net_device_ops netkit_netdev_ops = { ...@@ -184,6 +200,7 @@ static const struct net_device_ops netkit_netdev_ops = {
.ndo_set_rx_headroom = netkit_set_headroom, .ndo_set_rx_headroom = netkit_set_headroom,
.ndo_get_iflink = netkit_get_iflink, .ndo_get_iflink = netkit_get_iflink,
.ndo_get_peer_dev = netkit_peer_dev, .ndo_get_peer_dev = netkit_peer_dev,
.ndo_get_stats64 = netkit_get_stats,
.ndo_uninit = netkit_uninit, .ndo_uninit = netkit_uninit,
.ndo_features_check = passthru_features_check, .ndo_features_check = passthru_features_check,
}; };
...@@ -218,6 +235,7 @@ static void netkit_setup(struct net_device *dev) ...@@ -218,6 +235,7 @@ static void netkit_setup(struct net_device *dev)
ether_setup(dev); ether_setup(dev);
dev->max_mtu = ETH_MAX_MTU; dev->max_mtu = ETH_MAX_MTU;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->flags |= IFF_NOARP; dev->flags |= IFF_NOARP;
dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags &= ~IFF_TX_SKB_SHARING;
......
...@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
if (!use_napi) if (!use_napi)
dev_lstats_add(dev, length); dev_sw_netstats_tx_add(dev, 1, length);
else else
__veth_xdp_flush(rq); __veth_xdp_flush(rq);
} else { } else {
...@@ -387,14 +387,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -387,14 +387,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
return ret; return ret;
} }
static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
{
struct veth_priv *priv = netdev_priv(dev);
dev_lstats_read(dev, packets, bytes);
return atomic64_read(&priv->dropped);
}
static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
{ {
struct veth_priv *priv = netdev_priv(dev); struct veth_priv *priv = netdev_priv(dev);
...@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev, ...@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev,
struct veth_priv *priv = netdev_priv(dev); struct veth_priv *priv = netdev_priv(dev);
struct net_device *peer; struct net_device *peer;
struct veth_stats rx; struct veth_stats rx;
u64 packets, bytes;
tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); tot->tx_dropped = atomic64_read(&priv->dropped);
tot->tx_bytes = bytes; dev_fetch_sw_netstats(tot, dev->tstats);
tot->tx_packets = packets;
veth_stats_rx(&rx, dev); veth_stats_rx(&rx, dev);
tot->tx_dropped += rx.xdp_tx_err; tot->tx_dropped += rx.xdp_tx_err;
tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
tot->rx_bytes = rx.xdp_bytes; tot->rx_bytes += rx.xdp_bytes;
tot->rx_packets = rx.xdp_packets; tot->rx_packets += rx.xdp_packets;
rcu_read_lock(); rcu_read_lock();
peer = rcu_dereference(priv->peer); peer = rcu_dereference(priv->peer);
if (peer) { if (peer) {
veth_stats_tx(peer, &packets, &bytes); struct rtnl_link_stats64 tot_peer = {};
tot->rx_bytes += bytes;
tot->rx_packets += packets; dev_fetch_sw_netstats(&tot_peer, peer->tstats);
tot->rx_bytes += tot_peer.tx_bytes;
tot->rx_packets += tot_peer.tx_packets;
veth_stats_rx(&rx, peer); veth_stats_rx(&rx, peer);
tot->tx_dropped += rx.peer_tq_xdp_xmit_err; tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
...@@ -1506,25 +1498,12 @@ static void veth_free_queues(struct net_device *dev) ...@@ -1506,25 +1498,12 @@ static void veth_free_queues(struct net_device *dev)
static int veth_dev_init(struct net_device *dev) static int veth_dev_init(struct net_device *dev)
{ {
int err; return veth_alloc_queues(dev);
dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
if (!dev->lstats)
return -ENOMEM;
err = veth_alloc_queues(dev);
if (err) {
free_percpu(dev->lstats);
return err;
}
return 0;
} }
static void veth_dev_free(struct net_device *dev) static void veth_dev_free(struct net_device *dev)
{ {
veth_free_queues(dev); veth_free_queues(dev);
free_percpu(dev->lstats);
} }
#ifdef CONFIG_NET_POLL_CONTROLLER #ifdef CONFIG_NET_POLL_CONTROLLER
...@@ -1796,6 +1775,7 @@ static void veth_setup(struct net_device *dev) ...@@ -1796,6 +1775,7 @@ static void veth_setup(struct net_device *dev)
NETIF_F_HW_VLAN_STAG_RX); NETIF_F_HW_VLAN_STAG_RX);
dev->needs_free_netdev = true; dev->needs_free_netdev = true;
dev->priv_destructor = veth_dev_free; dev->priv_destructor = veth_dev_free;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->max_mtu = ETH_MAX_MTU; dev->max_mtu = ETH_MAX_MTU;
dev->hw_features = VETH_FEATURES; dev->hw_features = VETH_FEATURES;
......
...@@ -121,22 +121,12 @@ struct net_vrf { ...@@ -121,22 +121,12 @@ struct net_vrf {
int ifindex; int ifindex;
}; };
struct pcpu_dstats {
u64 tx_pkts;
u64 tx_bytes;
u64 tx_drps;
u64 rx_pkts;
u64 rx_bytes;
u64 rx_drps;
struct u64_stats_sync syncp;
};
static void vrf_rx_stats(struct net_device *dev, int len) static void vrf_rx_stats(struct net_device *dev, int len)
{ {
struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
u64_stats_update_begin(&dstats->syncp); u64_stats_update_begin(&dstats->syncp);
dstats->rx_pkts++; dstats->rx_packets++;
dstats->rx_bytes += len; dstats->rx_bytes += len;
u64_stats_update_end(&dstats->syncp); u64_stats_update_end(&dstats->syncp);
} }
...@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev, ...@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev,
do { do {
start = u64_stats_fetch_begin(&dstats->syncp); start = u64_stats_fetch_begin(&dstats->syncp);
tbytes = dstats->tx_bytes; tbytes = dstats->tx_bytes;
tpkts = dstats->tx_pkts; tpkts = dstats->tx_packets;
tdrops = dstats->tx_drps; tdrops = dstats->tx_drops;
rbytes = dstats->rx_bytes; rbytes = dstats->rx_bytes;
rpkts = dstats->rx_pkts; rpkts = dstats->rx_packets;
} while (u64_stats_fetch_retry(&dstats->syncp, start)); } while (u64_stats_fetch_retry(&dstats->syncp, start));
stats->tx_bytes += tbytes; stats->tx_bytes += tbytes;
stats->tx_packets += tpkts; stats->tx_packets += tpkts;
...@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, ...@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
vrf_rx_stats(dev, len); vrf_rx_stats(dev, len);
else else
this_cpu_inc(dev->dstats->rx_drps); this_cpu_inc(dev->dstats->rx_drops);
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
...@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
u64_stats_update_begin(&dstats->syncp); u64_stats_update_begin(&dstats->syncp);
dstats->tx_pkts++; dstats->tx_packets++;
dstats->tx_bytes += len; dstats->tx_bytes += len;
u64_stats_update_end(&dstats->syncp); u64_stats_update_end(&dstats->syncp);
} else { } else {
this_cpu_inc(dev->dstats->tx_drps); this_cpu_inc(dev->dstats->tx_drops);
} }
return ret; return ret;
...@@ -1174,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev) ...@@ -1174,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev)
vrf_rtable_release(dev, vrf); vrf_rtable_release(dev, vrf);
vrf_rt6_release(dev, vrf); vrf_rt6_release(dev, vrf);
free_percpu(dev->dstats);
dev->dstats = NULL;
} }
static int vrf_dev_init(struct net_device *dev) static int vrf_dev_init(struct net_device *dev)
{ {
struct net_vrf *vrf = netdev_priv(dev); struct net_vrf *vrf = netdev_priv(dev);
dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
if (!dev->dstats)
goto out_nomem;
/* create the default dst which points back to us */ /* create the default dst which points back to us */
if (vrf_rtable_create(dev) != 0) if (vrf_rtable_create(dev) != 0)
goto out_stats; goto out_nomem;
if (vrf_rt6_create(dev) != 0) if (vrf_rt6_create(dev) != 0)
goto out_rth; goto out_rth;
...@@ -1203,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev) ...@@ -1203,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev)
out_rth: out_rth:
vrf_rtable_release(dev, vrf); vrf_rtable_release(dev, vrf);
out_stats:
free_percpu(dev->dstats);
dev->dstats = NULL;
out_nomem: out_nomem:
return -ENOMEM; return -ENOMEM;
} }
...@@ -1704,6 +1684,8 @@ static void vrf_setup(struct net_device *dev) ...@@ -1704,6 +1684,8 @@ static void vrf_setup(struct net_device *dev)
dev->min_mtu = IPV6_MIN_MTU; dev->min_mtu = IPV6_MIN_MTU;
dev->max_mtu = IP6_MAX_MTU; dev->max_mtu = IP6_MAX_MTU;
dev->mtu = dev->max_mtu; dev->mtu = dev->max_mtu;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
} }
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
......
...@@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type { ...@@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type {
ML_PRIV_CAN, ML_PRIV_CAN,
}; };
enum netdev_stat_type {
NETDEV_PCPU_STAT_NONE,
NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
};
/** /**
* struct net_device - The DEVICE structure. * struct net_device - The DEVICE structure.
* *
...@@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type { ...@@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type {
* *
* @ml_priv: Mid-layer private * @ml_priv: Mid-layer private
* @ml_priv_type: Mid-layer private type * @ml_priv_type: Mid-layer private type
* @lstats: Loopback statistics *
* @tstats: Tunnel statistics * @pcpu_stat_type: Type of device statistics which the core should
* @dstats: Dummy statistics * allocate/free: none, lstats, tstats, dstats. none
* @vstats: Virtual ethernet statistics * means the driver is handling statistics allocation/
* freeing internally.
* @lstats: Loopback statistics: packets, bytes
* @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes
* @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes
* *
* @garp_port: GARP * @garp_port: GARP
* @mrp_port: MRP * @mrp_port: MRP
...@@ -2354,6 +2365,7 @@ struct net_device { ...@@ -2354,6 +2365,7 @@ struct net_device {
void *ml_priv; void *ml_priv;
enum netdev_ml_priv_type ml_priv_type; enum netdev_ml_priv_type ml_priv_type;
enum netdev_stat_type pcpu_stat_type:8;
union { union {
struct pcpu_lstats __percpu *lstats; struct pcpu_lstats __percpu *lstats;
struct pcpu_sw_netstats __percpu *tstats; struct pcpu_sw_netstats __percpu *tstats;
...@@ -2755,6 +2767,16 @@ struct pcpu_sw_netstats { ...@@ -2755,6 +2767,16 @@ struct pcpu_sw_netstats {
struct u64_stats_sync syncp; struct u64_stats_sync syncp;
} __aligned(4 * sizeof(u64)); } __aligned(4 * sizeof(u64));
struct pcpu_dstats {
u64 rx_packets;
u64 rx_bytes;
u64 rx_drops;
u64 tx_packets;
u64 tx_bytes;
u64 tx_drops;
struct u64_stats_sync syncp;
} __aligned(8 * sizeof(u64));
struct pcpu_lstats { struct pcpu_lstats {
u64_stats_t packets; u64_stats_t packets;
u64_stats_t bytes; u64_stats_t bytes;
......
...@@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); ...@@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev));
#else #else
static inline int netkit_prog_attach(const union bpf_attr *attr, static inline int netkit_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog) struct bpf_prog *prog)
...@@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr, ...@@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr,
{ {
return -EINVAL; return -EINVAL;
} }
static inline struct net_device *netkit_peer_dev(struct net_device *dev)
{
return NULL;
}
#endif /* CONFIG_NETKIT */ #endif /* CONFIG_NETKIT */
#endif /* __NET_NETKIT_H */ #endif /* __NET_NETKIT_H */
...@@ -10051,6 +10051,54 @@ void netif_tx_stop_all_queues(struct net_device *dev) ...@@ -10051,6 +10051,54 @@ void netif_tx_stop_all_queues(struct net_device *dev)
} }
EXPORT_SYMBOL(netif_tx_stop_all_queues); EXPORT_SYMBOL(netif_tx_stop_all_queues);
static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
{
void __percpu *v;
/* Drivers implementing ndo_get_peer_dev must support tstat
* accounting, so that skb_do_redirect() can bump the dev's
* RX stats upon network namespace switch.
*/
if (dev->netdev_ops->ndo_get_peer_dev &&
dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS)
return -EOPNOTSUPP;
switch (dev->pcpu_stat_type) {
case NETDEV_PCPU_STAT_NONE:
return 0;
case NETDEV_PCPU_STAT_LSTATS:
v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
break;
case NETDEV_PCPU_STAT_TSTATS:
v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
break;
case NETDEV_PCPU_STAT_DSTATS:
v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
break;
default:
return -EINVAL;
}
return v ? 0 : -ENOMEM;
}
static void netdev_do_free_pcpu_stats(struct net_device *dev)
{
switch (dev->pcpu_stat_type) {
case NETDEV_PCPU_STAT_NONE:
return;
case NETDEV_PCPU_STAT_LSTATS:
free_percpu(dev->lstats);
break;
case NETDEV_PCPU_STAT_TSTATS:
free_percpu(dev->tstats);
break;
case NETDEV_PCPU_STAT_DSTATS:
free_percpu(dev->dstats);
break;
}
}
/** /**
* register_netdevice() - register a network device * register_netdevice() - register a network device
* @dev: device to register * @dev: device to register
...@@ -10111,9 +10159,13 @@ int register_netdevice(struct net_device *dev) ...@@ -10111,9 +10159,13 @@ int register_netdevice(struct net_device *dev)
goto err_uninit; goto err_uninit;
} }
ret = netdev_do_alloc_pcpu_stats(dev);
if (ret)
goto err_uninit;
ret = dev_index_reserve(net, dev->ifindex); ret = dev_index_reserve(net, dev->ifindex);
if (ret < 0) if (ret < 0)
goto err_uninit; goto err_free_pcpu;
dev->ifindex = ret; dev->ifindex = ret;
/* Transfer changeable features to wanted_features and enable /* Transfer changeable features to wanted_features and enable
...@@ -10219,6 +10271,8 @@ int register_netdevice(struct net_device *dev) ...@@ -10219,6 +10271,8 @@ int register_netdevice(struct net_device *dev)
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
err_ifindex_release: err_ifindex_release:
dev_index_release(net, dev->ifindex); dev_index_release(net, dev->ifindex);
err_free_pcpu:
netdev_do_free_pcpu_stats(dev);
err_uninit: err_uninit:
if (dev->netdev_ops->ndo_uninit) if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev); dev->netdev_ops->ndo_uninit(dev);
...@@ -10471,6 +10525,7 @@ void netdev_run_todo(void) ...@@ -10471,6 +10525,7 @@ void netdev_run_todo(void)
WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr));
netdev_do_free_pcpu_stats(dev);
if (dev->priv_destructor) if (dev->priv_destructor)
dev->priv_destructor(dev); dev->priv_destructor(dev);
if (dev->needs_free_netdev) if (dev->needs_free_netdev)
......
...@@ -81,6 +81,7 @@ ...@@ -81,6 +81,7 @@
#include <net/xdp.h> #include <net/xdp.h>
#include <net/mptcp.h> #include <net/mptcp.h>
#include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netkit.h>
#include <linux/un.h> #include <linux/un.h>
#include "dev.h" #include "dev.h"
...@@ -2468,6 +2469,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { ...@@ -2468,6 +2469,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
static struct net_device *skb_get_peer_dev(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
if (likely(ops->ndo_get_peer_dev))
return INDIRECT_CALL_1(ops->ndo_get_peer_dev,
netkit_peer_dev, dev);
return NULL;
}
int skb_do_redirect(struct sk_buff *skb) int skb_do_redirect(struct sk_buff *skb)
{ {
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
...@@ -2481,17 +2492,15 @@ int skb_do_redirect(struct sk_buff *skb) ...@@ -2481,17 +2492,15 @@ int skb_do_redirect(struct sk_buff *skb)
if (unlikely(!dev)) if (unlikely(!dev))
goto out_drop; goto out_drop;
if (flags & BPF_F_PEER) { if (flags & BPF_F_PEER) {
const struct net_device_ops *ops = dev->netdev_ops; if (unlikely(!skb_at_tc_ingress(skb)))
if (unlikely(!ops->ndo_get_peer_dev ||
!skb_at_tc_ingress(skb)))
goto out_drop; goto out_drop;
dev = ops->ndo_get_peer_dev(dev); dev = skb_get_peer_dev(dev);
if (unlikely(!dev || if (unlikely(!dev ||
!(dev->flags & IFF_UP) || !(dev->flags & IFF_UP) ||
net_eq(net, dev_net(dev)))) net_eq(net, dev_net(dev))))
goto out_drop; goto out_drop;
skb->dev = dev; skb->dev = dev;
dev_sw_netstats_rx_add(dev, skb->len);
return -EAGAIN; return -EAGAIN;
} }
return flags & BPF_F_NEIGH ? return flags & BPF_F_NEIGH ?
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "test_progs.h" #include "test_progs.h"
#include "network_helpers.h" #include "network_helpers.h"
#include "netlink_helpers.h"
#include "test_tc_neigh_fib.skel.h" #include "test_tc_neigh_fib.skel.h"
#include "test_tc_neigh.skel.h" #include "test_tc_neigh.skel.h"
#include "test_tc_peer.skel.h" #include "test_tc_peer.skel.h"
...@@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb) ...@@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb)
} }
} }
enum dev_mode {
MODE_VETH,
MODE_NETKIT,
};
struct netns_setup_result { struct netns_setup_result {
int ifindex_veth_src; enum dev_mode dev_mode;
int ifindex_veth_src_fwd; int ifindex_src;
int ifindex_veth_dst; int ifindex_src_fwd;
int ifindex_veth_dst_fwd; int ifindex_dst;
int ifindex_dst_fwd;
}; };
static int get_ifaddr(const char *name, char *ifaddr) static int get_ifaddr(const char *name, char *ifaddr)
...@@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr) ...@@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr)
return 0; return 0;
} }
static int create_netkit(int mode, char *prim, char *peer)
{
struct rtattr *linkinfo, *data, *peer_info;
struct rtnl_handle rth = { .fd = -1 };
const char *type = "netkit";
struct {
struct nlmsghdr n;
struct ifinfomsg i;
char buf[1024];
} req = {};
int err;
err = rtnl_open(&rth, 0);
if (!ASSERT_OK(err, "open_rtnetlink"))
return err;
memset(&req, 0, sizeof(req));
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
req.n.nlmsg_type = RTM_NEWLINK;
req.i.ifi_family = AF_UNSPEC;
addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
req.n.nlmsg_len += sizeof(struct ifinfomsg);
addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
addattr_nest_end(&req.n, peer_info);
addattr_nest_end(&req.n, data);
addattr_nest_end(&req.n, linkinfo);
err = rtnl_talk(&rth, &req.n, NULL);
ASSERT_OK(err, "talk_rtnetlink");
rtnl_close(&rth);
return err;
}
static int netns_setup_links_and_routes(struct netns_setup_result *result) static int netns_setup_links_and_routes(struct netns_setup_result *result)
{ {
struct nstoken *nstoken = NULL; struct nstoken *nstoken = NULL;
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; char src_fwd_addr[IFADDR_STR_LEN+1] = {};
int err;
SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD); if (result->dev_mode == MODE_VETH) {
SYS(fail, "ip link set veth_dst address " MAC_DST); SYS(fail, "ip link add src type veth peer name src_fwd");
SYS(fail, "ip link add dst type veth peer name dst_fwd");
SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
SYS(fail, "ip link set dst address " MAC_DST);
} else if (result->dev_mode == MODE_NETKIT) {
err = create_netkit(NETKIT_L3, "src", "src_fwd");
if (!ASSERT_OK(err, "create_ifindex_src"))
goto fail;
err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
if (!ASSERT_OK(err, "create_ifindex_dst"))
goto fail;
}
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) if (get_ifaddr("src_fwd", src_fwd_addr))
goto fail; goto fail;
result->ifindex_veth_src = if_nametoindex("veth_src"); result->ifindex_src = if_nametoindex("src");
if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src")) if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
goto fail; goto fail;
result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd"); result->ifindex_src_fwd = if_nametoindex("src_fwd");
if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd")) if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
goto fail; goto fail;
result->ifindex_veth_dst = if_nametoindex("veth_dst"); result->ifindex_dst = if_nametoindex("dst");
if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst")) if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
goto fail; goto fail;
result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd"); result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
goto fail; goto fail;
SYS(fail, "ip link set veth_src netns " NS_SRC); SYS(fail, "ip link set src netns " NS_SRC);
SYS(fail, "ip link set veth_src_fwd netns " NS_FWD); SYS(fail, "ip link set src_fwd netns " NS_FWD);
SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD); SYS(fail, "ip link set dst_fwd netns " NS_FWD);
SYS(fail, "ip link set veth_dst netns " NS_DST); SYS(fail, "ip link set dst netns " NS_DST);
/** setup in 'src' namespace */ /** setup in 'src' namespace */
nstoken = open_netns(NS_SRC); nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns src")) if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail; goto fail;
SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src"); SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad"); SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
SYS(fail, "ip link set dev veth_src up"); SYS(fail, "ip link set dev src up");
SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global"); SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global"); SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global"); SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s", if (result->dev_mode == MODE_VETH) {
veth_src_fwd_addr); SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s", src_fwd_addr);
veth_src_fwd_addr); SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
src_fwd_addr);
}
close_netns(nstoken); close_netns(nstoken);
...@@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) ...@@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
* needs v4 one in order to start ARP probing. IP4_NET route is added * needs v4 one in order to start ARP probing. IP4_NET route is added
* to the endpoints so that the ARP processing will reply. * to the endpoints so that the ARP processing will reply.
*/ */
SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd"); SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
SYS(fail, "ip link set dev veth_src_fwd up"); SYS(fail, "ip link set dev src_fwd up");
SYS(fail, "ip link set dev veth_dst_fwd up"); SYS(fail, "ip link set dev dst_fwd up");
SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
close_netns(nstoken); close_netns(nstoken);
...@@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) ...@@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (!ASSERT_OK_PTR(nstoken, "setns dst")) if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail; goto fail;
SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst"); SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad"); SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
SYS(fail, "ip link set dev veth_dst up"); SYS(fail, "ip link set dev dst up");
SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global"); SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global"); SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global"); SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); if (result->dev_mode == MODE_VETH) {
SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
}
close_netns(nstoken); close_netns(nstoken);
...@@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog, ...@@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog,
const struct bpf_program *chk_prog, const struct bpf_program *chk_prog,
const struct netns_setup_result *setup_result) const struct netns_setup_result *setup_result)
{ {
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
int err; int err;
/* tc qdisc add dev veth_src_fwd clsact */ /* tc qdisc add dev src_fwd clsact */
QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
/* tc filter add dev veth_src_fwd ingress bpf da src_prog */ /* tc filter add dev src_fwd ingress bpf da src_prog */
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0); XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
/* tc filter add dev veth_src_fwd egress bpf da chk_prog */ /* tc filter add dev src_fwd egress bpf da chk_prog */
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0); XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
/* tc qdisc add dev veth_dst_fwd clsact */ /* tc qdisc add dev dst_fwd clsact */
QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
/* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */ /* tc filter add dev dst_fwd ingress bpf da dst_prog */
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
/* tc filter add dev veth_dst_fwd egress bpf da chk_prog */ /* tc filter add dev dst_fwd egress bpf da chk_prog */
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
return 0; return 0;
fail: fail:
...@@ -539,10 +600,10 @@ static void test_inet_dtime(int family, int type, const char *addr, __u16 port) ...@@ -539,10 +600,10 @@ static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
static int netns_load_dtime_bpf(struct test_tc_dtime *skel, static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
const struct netns_setup_result *setup_result) const struct netns_setup_result *setup_result)
{ {
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src); LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst); LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
struct nstoken *nstoken; struct nstoken *nstoken;
int err; int err;
...@@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel, ...@@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
nstoken = open_netns(NS_SRC); nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return -1; return -1;
/* tc qdisc add dev veth_src clsact */ /* tc qdisc add dev src clsact */
QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src); QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
/* tc filter add dev veth_src ingress bpf da ingress_host */ /* tc filter add dev src ingress bpf da ingress_host */
XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
/* tc filter add dev veth_src egress bpf da egress_host */ /* tc filter add dev src egress bpf da egress_host */
XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
close_netns(nstoken); close_netns(nstoken);
/* setup ns_dst tc progs */ /* setup ns_dst tc progs */
nstoken = open_netns(NS_DST); nstoken = open_netns(NS_DST);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
return -1; return -1;
/* tc qdisc add dev veth_dst clsact */ /* tc qdisc add dev dst clsact */
QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst); QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
/* tc filter add dev veth_dst ingress bpf da ingress_host */ /* tc filter add dev dst ingress bpf da ingress_host */
XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
/* tc filter add dev veth_dst egress bpf da egress_host */ /* tc filter add dev dst egress bpf da egress_host */
XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
close_netns(nstoken); close_netns(nstoken);
/* setup ns_fwd tc progs */ /* setup ns_fwd tc progs */
nstoken = open_netns(NS_FWD); nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
return -1; return -1;
/* tc qdisc add dev veth_dst_fwd clsact */ /* tc qdisc add dev dst_fwd clsact */
QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
/* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio100, 100); skel->progs.ingress_fwdns_prio100, 100);
/* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio101, 101); skel->progs.ingress_fwdns_prio101, 101);
/* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio100, 100); skel->progs.egress_fwdns_prio100, 100);
/* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio101, 101); skel->progs.egress_fwdns_prio101, 101);
/* tc qdisc add dev veth_src_fwd clsact */ /* tc qdisc add dev src_fwd clsact */
QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
/* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio100, 100); skel->progs.ingress_fwdns_prio100, 100);
/* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio101, 101); skel->progs.ingress_fwdns_prio101, 101);
/* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio100, 100); skel->progs.egress_fwdns_prio100, 100);
/* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio101, 101); skel->progs.egress_fwdns_prio101, 101);
close_netns(nstoken); close_netns(nstoken);
return 0; return 0;
...@@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) ...@@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
return; return;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_dtime__load(skel); err = test_tc_dtime__load(skel);
if (!ASSERT_OK(err, "test_tc_dtime__load")) if (!ASSERT_OK(err, "test_tc_dtime__load"))
...@@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) ...@@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
goto done; goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_neigh__load(skel); err = test_tc_neigh__load(skel);
if (!ASSERT_OK(err, "test_tc_neigh__load")) if (!ASSERT_OK(err, "test_tc_neigh__load"))
...@@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) ...@@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
goto done; goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_peer__load(skel); err = test_tc_peer__load(skel);
if (!ASSERT_OK(err, "test_tc_peer__load")) if (!ASSERT_OK(err, "test_tc_peer__load"))
...@@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd) ...@@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd)
static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
{ {
LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
struct test_tc_peer *skel = NULL; struct test_tc_peer *skel = NULL;
struct nstoken *nstoken = NULL; struct nstoken *nstoken = NULL;
int err; int err;
...@@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) ...@@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
goto fail; goto fail;
skel->rodata->IFINDEX_SRC = ifindex; skel->rodata->IFINDEX_SRC = ifindex;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_peer__load(skel); err = test_tc_peer__load(skel);
if (!ASSERT_OK(err, "test_tc_peer__load")) if (!ASSERT_OK(err, "test_tc_peer__load"))
...@@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) ...@@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
/* Load "tc_src_l3" to the tun_fwd interface to redirect packets /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
* towards dst, and "tc_dst" to redirect packets * towards dst, and "tc_dst" to redirect packets
* and "tc_chk" on veth_dst_fwd to drop non-redirected packets. * and "tc_chk" on dst_fwd to drop non-redirected packets.
*/ */
/* tc qdisc add dev tun_fwd clsact */ /* tc qdisc add dev tun_fwd clsact */
QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
/* tc qdisc add dev veth_dst_fwd clsact */ /* tc qdisc add dev dst_fwd clsact */
QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
/* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */ /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
/* tc filter add dev veth_dst_fwd egress bpf da tc_chk */ /* tc filter add dev dst_fwd egress bpf da tc_chk */
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
/* Setup route and neigh tables */ /* Setup route and neigh tables */
SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
...@@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) ...@@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
" dev tun_src scope global"); " dev tun_src scope global");
SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
" dev tun_src scope global"); " dev tun_src scope global");
SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
goto fail; goto fail;
...@@ -1106,9 +1167,9 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) ...@@ -1106,9 +1167,9 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
close_netns(nstoken); close_netns(nstoken);
} }
#define RUN_TEST(name) \ #define RUN_TEST(name, mode) \
({ \ ({ \
struct netns_setup_result setup_result; \ struct netns_setup_result setup_result = { .dev_mode = mode, }; \
if (test__start_subtest(#name)) \ if (test__start_subtest(#name)) \
if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
...@@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg) ...@@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg)
{ {
netns_setup_namespaces_nofail("delete"); netns_setup_namespaces_nofail("delete");
RUN_TEST(tc_redirect_peer); RUN_TEST(tc_redirect_peer, MODE_VETH);
RUN_TEST(tc_redirect_peer_l3); RUN_TEST(tc_redirect_peer, MODE_NETKIT);
RUN_TEST(tc_redirect_neigh); RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
RUN_TEST(tc_redirect_neigh_fib); RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
RUN_TEST(tc_redirect_dtime); RUN_TEST(tc_redirect_neigh, MODE_VETH);
RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
RUN_TEST(tc_redirect_dtime, MODE_VETH);
return NULL; return NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment