Commit 7a3f5b0d authored by Ryoga Saito's avatar Ryoga Saito Committed by Pablo Neira Ayuso

netfilter: add netfilter hooks to SRv6 data plane

This patch introduces netfilter hooks for solving the problem that
conntrack couldn't record both inner flows and outer flows.

This patch also introduces a new sysctl toggle for enabling lightweight
tunnel netfilter hooks.
Signed-off-by: default avatarRyoga Saito <contact@proelbtn.com>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 7bc416f1
...@@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds) ...@@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
This extended timeout will be used in case there is an GRE stream This extended timeout will be used in case there is an GRE stream
detected. detected.
nf_hooks_lwtunnel - BOOLEAN
- 0 - disabled (default)
- not 0 - enabled
If this option is enabled, the lightweight tunnel netfilter hooks are
enabled. This option cannot be disabled once it is enabled.
nf_flowtable_tcp_timeout - INTEGER (seconds) nf_flowtable_tcp_timeout - INTEGER (seconds)
default 30 default 30
......
...@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops { ...@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
}; };
#ifdef CONFIG_LWTUNNEL #ifdef CONFIG_LWTUNNEL
DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
void lwtstate_free(struct lwtunnel_state *lws); void lwtstate_free(struct lwtunnel_state *lws);
static inline struct lwtunnel_state * static inline struct lwtunnel_state *
......
#include <linux/sysctl.h>
#include <linux/types.h>
#ifdef CONFIG_SYSCTL
int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#endif
...@@ -23,6 +23,9 @@ ...@@ -23,6 +23,9 @@
#include <net/ip6_fib.h> #include <net/ip6_fib.h>
#include <net/rtnh.h> #include <net/rtnh.h>
DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
......
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
#ifdef CONFIG_IPV6_SEG6_HMAC #ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h> #include <net/seg6_hmac.h>
#endif #endif
#include <net/lwtunnel.h>
#include <linux/netfilter.h>
static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{ {
...@@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb) ...@@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
return 0; return 0;
} }
static int seg6_input(struct sk_buff *skb) static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
return dst_input(skb);
}
static int seg6_input_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{ {
struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
...@@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb) ...@@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb)
if (unlikely(err)) if (unlikely(err))
return err; return err;
return dst_input(skb); if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
} }
static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) static int seg6_input_nf(struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct net *net = dev_net(skb->dev);
switch (skb->protocol) {
case htons(ETH_P_IP):
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
skb, NULL, dev, seg6_input_core);
case htons(ETH_P_IPV6):
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
skb, NULL, dev, seg6_input_core);
}
return -EINVAL;
}
static int seg6_input(struct sk_buff *skb)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return seg6_input_nf(skb);
return seg6_input_core(dev_net(skb->dev), NULL, skb);
}
static int seg6_output_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{ {
struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
...@@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) ...@@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err)) if (unlikely(err))
goto drop; goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);
return dst_output(net, sk, skb); return dst_output(net, sk, skb);
drop: drop:
kfree_skb(skb); kfree_skb(skb);
return err; return err;
} }
static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
switch (skb->protocol) {
case htons(ETH_P_IP):
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
NULL, dev, seg6_output_core);
case htons(ETH_P_IPV6):
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
NULL, dev, seg6_output_core);
}
return -EINVAL;
}
static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return seg6_output_nf(net, sk, skb);
return seg6_output_core(net, sk, skb);
}
static int seg6_build_state(struct net *net, struct nlattr *nla, static int seg6_build_state(struct net *net, struct nlattr *nla,
unsigned int family, const void *cfg, unsigned int family, const void *cfg,
struct lwtunnel_state **ts, struct lwtunnel_state **ts,
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
#include <net/seg6_local.h> #include <net/seg6_local.h>
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <net/lwtunnel.h>
#include <linux/netfilter.h>
#define SEG6_F_ATTR(i) BIT(i) #define SEG6_F_ATTR(i) BIT(i)
...@@ -413,12 +415,33 @@ static int input_action_end_dx2(struct sk_buff *skb, ...@@ -413,12 +415,33 @@ static int input_action_end_dx2(struct sk_buff *skb,
return -EINVAL; return -EINVAL;
} }
static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct in6_addr *nhaddr = NULL;
struct seg6_local_lwt *slwt;
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
*
* If slwt->nh6 is set to ::, then lookup the nexthop for the
* inner packet's DA. Otherwise, use the specified nexthop.
*/
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
}
/* decapsulate and forward to specified nexthop */ /* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb, static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt) struct seg6_local_lwt *slwt)
{ {
struct in6_addr *nhaddr = NULL;
/* this function accepts IPv6 encapsulated packets, with either /* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH. * an SRH with SL=0, or no SRH.
*/ */
...@@ -429,40 +452,30 @@ static int input_action_end_dx6(struct sk_buff *skb, ...@@ -429,40 +452,30 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop; goto drop;
/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
*
* If slwt->nh6 is set to ::, then lookup the nexthop for the
* inner packet's DA. Otherwise, use the specified nexthop.
*/
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
skb_set_transport_header(skb, sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
seg6_lookup_nexthop(skb, nhaddr, 0); if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, input_action_end_dx6_finish);
return dst_input(skb); return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop: drop:
kfree_skb(skb); kfree_skb(skb);
return -EINVAL; return -EINVAL;
} }
static int input_action_end_dx4(struct sk_buff *skb, static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
struct seg6_local_lwt *slwt) struct sk_buff *skb)
{ {
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_local_lwt *slwt;
struct iphdr *iph; struct iphdr *iph;
__be32 nhaddr; __be32 nhaddr;
int err; int err;
if (!decap_and_validate(skb, IPPROTO_IPIP)) slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
goto drop;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
skb->protocol = htons(ETH_P_IP);
iph = ip_hdr(skb); iph = ip_hdr(skb);
...@@ -470,14 +483,34 @@ static int input_action_end_dx4(struct sk_buff *skb, ...@@ -470,14 +483,34 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb); skb_dst_drop(skb);
skb_set_transport_header(skb, sizeof(struct iphdr));
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
if (err) if (err) {
goto drop; kfree_skb(skb);
return -EINVAL;
}
return dst_input(skb); return dst_input(skb);
}
static int input_action_end_dx4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
skb->protocol = htons(ETH_P_IP);
skb_set_transport_header(skb, sizeof(struct iphdr));
nf_reset_ct(skb);
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, input_action_end_dx4_finish);
return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop: drop:
kfree_skb(skb); kfree_skb(skb);
return -EINVAL; return -EINVAL;
...@@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, ...@@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
skb_dst_drop(skb); skb_dst_drop(skb);
skb_set_transport_header(skb, hdrlen); skb_set_transport_header(skb, hdrlen);
nf_reset_ct(skb);
return end_dt_vrf_rcv(skb, family, vrf); return end_dt_vrf_rcv(skb, family, vrf);
...@@ -1078,7 +1112,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt, ...@@ -1078,7 +1112,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
u64_stats_update_end(&pcounters->syncp); u64_stats_update_end(&pcounters->syncp);
} }
static int seg6_local_input(struct sk_buff *skb) static int seg6_local_input_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{ {
struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc; struct seg6_action_desc *desc;
...@@ -1086,11 +1121,6 @@ static int seg6_local_input(struct sk_buff *skb) ...@@ -1086,11 +1121,6 @@ static int seg6_local_input(struct sk_buff *skb)
unsigned int len = skb->len; unsigned int len = skb->len;
int rc; int rc;
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}
slwt = seg6_local_lwtunnel(orig_dst->lwtstate); slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc; desc = slwt->desc;
...@@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb) ...@@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb)
return rc; return rc;
} }
static int seg6_local_input(struct sk_buff *skb)
{
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
seg6_local_input_core);
return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
}
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
......
...@@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/ ...@@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/
# IPVS # IPVS
obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_IP_VS) += ipvs/
# lwtunnel
obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o
...@@ -22,6 +22,9 @@ ...@@ -22,6 +22,9 @@
#include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_timestamp.h>
#ifdef CONFIG_LWTUNNEL
#include <net/netfilter/nf_hooks_lwtunnel.h>
#endif
#include <linux/rculist_nulls.h> #include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly; static bool enable_hooks __read_mostly;
...@@ -612,6 +615,9 @@ enum nf_ct_sysctl_index { ...@@ -612,6 +615,9 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif #endif
#ifdef CONFIG_LWTUNNEL
NF_SYSCTL_CT_LWTUNNEL,
#endif
__NF_SYSCTL_CT_LAST_SYSCTL, __NF_SYSCTL_CT_LAST_SYSCTL,
}; };
...@@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = { ...@@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec_jiffies,
}, },
#endif
#ifdef CONFIG_LWTUNNEL
[NF_SYSCTL_CT_LWTUNNEL] = {
.procname = "nf_hooks_lwtunnel",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = nf_hooks_lwtunnel_sysctl_handler,
},
#endif #endif
{} {}
}; };
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/sysctl.h>
#include <net/lwtunnel.h>
#include <net/netfilter/nf_hooks_lwtunnel.h>
static inline int nf_hooks_lwtunnel_get(void)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return 1;
else
return 0;
}
static inline int nf_hooks_lwtunnel_set(int enable)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) {
if (!enable)
return -EBUSY;
} else if (enable) {
static_branch_enable(&nf_hooks_lwtunnel_enabled);
}
return 0;
}
#ifdef CONFIG_SYSCTL
int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int proc_nf_hooks_lwtunnel_enabled = 0;
struct ctl_table tmp = {
.procname = table->procname,
.data = &proc_nf_hooks_lwtunnel_enabled,
.maxlen = sizeof(int),
.mode = table->mode,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};
int ret;
if (!write)
proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get();
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled);
return ret;
}
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
#endif /* CONFIG_SYSCTL */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment