Commit 44a949ad authored by David S. Miller's avatar David S. Miller

Merge branch 'net-sched-tc-block-ports-tracking'

Victor Nogueira says:

====================
net/sched: Introduce tc block ports tracking and use

__context__
The "tc block" is a collection of netdevs/ports which allow qdiscs to share
match-action block instances (as opposed to the traditional tc filter per
netdev/port)[1].

Up to this point in the implementation, the block is unaware of its ports.
This patch makes the tc block ports available to the datapath.

For the datapath we provide a use case of the tc block in a mirred
action in patch 3. For users can levarage mirred to do something like
the following:

$ tc qdisc add dev ens7 ingress_block 22 clsact
$ tc qdisc add dev ens8 ingress_block 22 clsact
$ tc qdisc add dev ens9 ingress_block 22 clsact
$ tc filter add block 22 protocol ip pref 25 \
  flower dst_ip 192.168.0.0/16 action mirred egress mirror blockid 22

In this case, if the packet arrives on ens8, it will be copied and sent to
all ports in the block excluding ens8. Note that the packet is still in
the pipeline at this point - meaning other actions could be added after the
mirror because mirred copies/clones the skb. Example the following is
valid:

$ tc filter add block 22 protocol ip pref 25 flower dst_ip 192.168.0.0/16 \
action mirred egress mirror blockid 22 \
action vlan push id 123 \
action mirred egress redirect dev dummy0

redirect behavior always steals the packet from the pipeline and therefore
the skb is no longer available for a subsequent action as illustrated above
(in redirecting to dummy0).

The behavior of redirecting to a tc block is therefore adapted to work in
the same manner. So a setup as such:
$ tc qdisc add dev ens7 ingress_block 22
$ tc qdisc add dev ens8 ingress_block 22
$ tc qdisc add dev ens9 ingress_block 22
$ tc filter add block 22 protocol ip pref 25 \
  flower dst_ip 192.168.0.0/16 action mirred egress redirect blockid 22

for a matching packet arriving on ens7 will first send a copy/clone to ens8
(as in the "mirror" behavior) then to ens9 as in the redirect behavior
above. Once this processing is done - no other actions are able to process
this skb. i.e it is removed from the "pipeline".

In this case, if the packet arrives on ens8, it will be copied and sent to
all ports in the block excluding ens8.

Patch 1 separates/exports mirror and redirect functions from act_mirred
Patch 2 introduces the required infra.
Patch 3 Allows mirred to blocks

Subsequent patches will come with tdc test cases.

__Acknowledgements__
Suggestions from Vlad Buslov and Marcelo Ricardo Leitner made this patchset
better. The idea of integrating the ports into the tc block was suggested
by Jiri Pirko.

[1] See commit ca46abd6 ("Merge branch'net-sched-allow-qdiscs-to-share-filter-block-instances'")

Changes in v2:
  - Remove RFC tag
  - Add more details in patch 0(Jiri)
  - When CONFIG_NET_TC_SKB_EXT is selected we have unused qdisc_cb
    Reported-by: kernel test robot <lkp@intel.com> (and
horms@kernel.org)
  - Fix bad dev dereference in printk of blockcast action (Simon)

Changes in v3:
  - Add missing xa_destroy (pointed out by Vlad)
  - Remove bugfix pointed by Vlad (will send in separate patch)
  - Removed ports from subject in patch #2 and typos (suggested by
    Marcelo)
  - Remove net_notice_ratelimited debug messages in error
    cases (suggested by Marcelo)
  - Minor changes to appease sparse's lock context warning

Changes in v4:
  - Avoid code repetition using gotos in cast_one (suggested by Paolo)
  - Fix typo in cover letter (pointed out by Paolo)
  - Create a module description for act_blockcast
    (reported by Paolo and CI)

Changes in v5:
  - Add new patch which separated mirred into mirror and redirect
    functions (suggested by Jiri)
  - Instead of repeating the code to mirror in blockcast use mirror
    exported function by patch1 (tcf_mirror_act)
  - Make Block ID into act_blockcast's parameter passed by user space
    instead of always getting it from SKB (suggested by Jiri)
  - Add tx_type parameter which will specify what transmission behaviour
    we want (as described earlier)

Changes in v6:
  - Remove blockcast and make it a part of mirred (suggestd by Jiri)
  - Block ID is now a mirred parameter
  - We now allow redirecting and mirroring to either ingress or egress

Changes in v7:
  - Remove set but not used variable in tcf_mirred_act (pointed out by
    Jakub)

Changes in v8:
  - Fix uapi issues (pointed out by Jiri)
  - Separate last patch into 3 - two as preparations for adding
    block ID to mirred and one allowing mirred to block (suggested by Jiri)
  - Remove declaration initialisation of eg_block and in_block in
    qdisc_block_add_dev (suggested by Jiri)
  - Avoid unnecessary if guards in qdisc_block_add_dev (suggested by Jiri)
  - Remove unncessary block_index retrieval in __qdisc_destroy
    (suggested by Jiri)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b1dffcf0 42f39036
......@@ -19,6 +19,7 @@
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
#include <linux/xarray.h>
struct Qdisc_ops;
struct qdisc_walker;
......@@ -456,6 +457,7 @@ struct tcf_chain {
};
struct tcf_block {
struct xarray ports; /* datapath accessible */
/* Lock protects tcf_block and lifetime-management data of chains
* attached to the block (refcnt, action_refcnt, explicitly_created).
*/
......@@ -482,6 +484,8 @@ struct tcf_block {
struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};
struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index);
static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
{
return lockdep_is_held(&chain->filter_chain_lock);
......
......@@ -8,6 +8,7 @@
struct tcf_mirred {
struct tc_action common;
int tcfm_eaction;
u32 tcfm_blockid;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
netdevice_tracker tcfm_dev_tracker;
......
......@@ -21,6 +21,7 @@ enum {
TCA_MIRRED_TM,
TCA_MIRRED_PARMS,
TCA_MIRRED_PAD,
TCA_MIRRED_BLOCKID,
__TCA_MIRRED_MAX
};
#define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1)
......
This diff is collapsed.
......@@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block)
{
mutex_destroy(&block->lock);
mutex_destroy(&block->proto_destroy_lock);
xa_destroy(&block->ports);
kfree_rcu(block, rcu);
}
......@@ -1002,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
refcount_set(&block->refcnt, 1);
block->net = net;
block->index = block_index;
xa_init(&block->ports);
/* Don't store q pointer for blocks which are shared */
if (!tcf_block_shared(block))
......@@ -1009,12 +1011,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return block;
}
static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
return idr_find(&tn->idr, block_index);
}
EXPORT_SYMBOL(tcf_block_lookup);
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
......
......@@ -1180,6 +1180,43 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
return 0;
}
static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev,
struct netlink_ext_ack *extack)
{
const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops;
struct tcf_block *block;
int err;
block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
if (block) {
err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
if (err) {
NL_SET_ERR_MSG(extack,
"ingress block dev insert failed");
return err;
}
}
block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL);
if (block) {
err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
if (err) {
NL_SET_ERR_MSG(extack,
"Egress block dev insert failed");
goto err_out;
}
}
return 0;
err_out:
block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL);
if (block)
xa_erase(&block->ports, dev->ifindex);
return err;
}
static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
struct netlink_ext_ack *extack)
{
......@@ -1350,6 +1387,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
qdisc_hash_add(sch, false);
trace_qdisc_create(ops, dev, parent);
err = qdisc_block_add_dev(sch, dev, extack);
if (err)
goto err_out4;
return sch;
err_out4:
......
......@@ -1051,6 +1051,9 @@ static void qdisc_free_cb(struct rcu_head *head)
static void __qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
struct net_device *dev = qdisc_dev(qdisc);
const struct Qdisc_class_ops *cops;
struct tcf_block *block;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
......@@ -1061,11 +1064,24 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
qdisc_reset(qdisc);
cops = ops->cl_ops;
if (ops->ingress_block_get) {
block = cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
if (block)
xa_erase(&block->ports, dev->ifindex);
}
if (ops->egress_block_get) {
block = cops->tcf_block(qdisc, TC_H_MIN_EGRESS, NULL);
if (block)
xa_erase(&block->ports, dev->ifindex);
}
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
netdev_put(dev, &qdisc->dev_tracker);
trace_qdisc_destroy(qdisc);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment