Commit 9dfa859d authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Clean up and consolidate ct ecache infrastructure by merging ct and
   expect notifiers, from Florian Westphal.

2) Missing counters and timestamp in nfnetlink_queue and _log conntrack
   information.

3) Missing error check for xt_register_template() in iptables mangle,
   as a incremental fix for the previous pull request, also from
   Florian Westphal.

4) Add netfilter hooks for the SRv6 lightweigh tunnel driver, from
   Ryoga Sato. The hooks are enabled via nf_hooks_lwtunnel sysctl
   to make sure existing netfilter rulesets do not break. There is
   a static key to disable the hooks by default.

   The pktgen_bench_xmit_mode_netif_receive.sh shows no noticeable
   impact in the seg6_input path for non-netfilter users: similar
   numbers with and without this patch.

   This is a sample of the perf report output:

    11.67%  kpktgend_0       [ipv6]                    [k] ipv6_get_saddr_eval
     7.89%  kpktgend_0       [ipv6]                    [k] __ipv6_addr_label
     7.52%  kpktgend_0       [ipv6]                    [k] __ipv6_dev_get_saddr
     6.63%  kpktgend_0       [kernel.vmlinux]          [k] asm_exc_nmi
     4.74%  kpktgend_0       [ipv6]                    [k] fib6_node_lookup_1
     3.48%  kpktgend_0       [kernel.vmlinux]          [k] pskb_expand_head
     3.33%  kpktgend_0       [ipv6]                    [k] ip6_rcv_core.isra.29
     3.33%  kpktgend_0       [ipv6]                    [k] seg6_do_srh_encap
     2.53%  kpktgend_0       [ipv6]                    [k] ipv6_dev_get_saddr
     2.45%  kpktgend_0       [ipv6]                    [k] fib6_table_lookup
     2.24%  kpktgend_0       [kernel.vmlinux]          [k] ___cache_free
     2.16%  kpktgend_0       [ipv6]                    [k] ip6_pol_route
     2.11%  kpktgend_0       [kernel.vmlinux]          [k] __ipv6_addr_type
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 724812d8 7a3f5b0d
...@@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds) ...@@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
This extended timeout will be used in case there is an GRE stream This extended timeout will be used in case there is an GRE stream
detected. detected.
nf_hooks_lwtunnel - BOOLEAN
- 0 - disabled (default)
- not 0 - enabled
If this option is enabled, the lightweight tunnel netfilter hooks are
enabled. This option cannot be disabled once it is enabled.
nf_flowtable_tcp_timeout - INTEGER (seconds) nf_flowtable_tcp_timeout - INTEGER (seconds)
default 30 default 30
......
...@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops { ...@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
}; };
#ifdef CONFIG_LWTUNNEL #ifdef CONFIG_LWTUNNEL
DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
void lwtstate_free(struct lwtunnel_state *lws); void lwtstate_free(struct lwtunnel_state *lws);
static inline struct lwtunnel_state * static inline struct lwtunnel_state *
......
...@@ -72,14 +72,20 @@ struct nf_ct_event { ...@@ -72,14 +72,20 @@ struct nf_ct_event {
int report; int report;
}; };
struct nf_exp_event {
struct nf_conntrack_expect *exp;
u32 portid;
int report;
};
struct nf_ct_event_notifier { struct nf_ct_event_notifier {
int (*fcn)(unsigned int events, struct nf_ct_event *item); int (*ct_event)(unsigned int events, const struct nf_ct_event *item);
int (*exp_event)(unsigned int events, const struct nf_exp_event *item);
}; };
int nf_conntrack_register_notifier(struct net *net, void nf_conntrack_register_notifier(struct net *net,
struct nf_ct_event_notifier *nb); const struct nf_ct_event_notifier *nb);
void nf_conntrack_unregister_notifier(struct net *net, void nf_conntrack_unregister_notifier(struct net *net);
struct nf_ct_event_notifier *nb);
void nf_ct_deliver_cached_events(struct nf_conn *ct); void nf_ct_deliver_cached_events(struct nf_conn *ct);
int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
...@@ -151,22 +157,6 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) ...@@ -151,22 +157,6 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
} }
#ifdef CONFIG_NF_CONNTRACK_EVENTS #ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_exp_event {
struct nf_conntrack_expect *exp;
u32 portid;
int report;
};
struct nf_exp_event_notifier {
int (*fcn)(unsigned int events, struct nf_exp_event *item);
};
int nf_ct_expect_register_notifier(struct net *net,
struct nf_exp_event_notifier *nb);
void nf_ct_expect_unregister_notifier(struct net *net,
struct nf_exp_event_notifier *nb);
void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp, struct nf_conntrack_expect *exp,
u32 portid, int report); u32 portid, int report);
......
#include <linux/sysctl.h>
#include <linux/types.h>
#ifdef CONFIG_SYSCTL
int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#endif
...@@ -113,7 +113,6 @@ struct netns_ct { ...@@ -113,7 +113,6 @@ struct netns_ct {
struct ct_pcpu __percpu *pcpu_lists; struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat; struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
struct nf_ip_net nf_ct_proto; struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS) #if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used; unsigned int labels_used;
......
...@@ -23,6 +23,9 @@ ...@@ -23,6 +23,9 @@
#include <net/ip6_fib.h> #include <net/ip6_fib.h>
#include <net/rtnh.h> #include <net/rtnh.h>
DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
#ifdef CONFIG_MODULES #ifdef CONFIG_MODULES
static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
......
...@@ -112,6 +112,8 @@ static int __init iptable_mangle_init(void) ...@@ -112,6 +112,8 @@ static int __init iptable_mangle_init(void)
{ {
int ret = xt_register_template(&packet_mangler, int ret = xt_register_template(&packet_mangler,
iptable_mangle_table_init); iptable_mangle_table_init);
if (ret < 0)
return ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
if (IS_ERR(mangle_ops)) { if (IS_ERR(mangle_ops)) {
......
...@@ -26,6 +26,8 @@ ...@@ -26,6 +26,8 @@
#ifdef CONFIG_IPV6_SEG6_HMAC #ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h> #include <net/seg6_hmac.h>
#endif #endif
#include <net/lwtunnel.h>
#include <linux/netfilter.h>
static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{ {
...@@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb) ...@@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
return 0; return 0;
} }
static int seg6_input(struct sk_buff *skb) static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
return dst_input(skb);
}
static int seg6_input_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{ {
struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
...@@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb) ...@@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb)
if (unlikely(err)) if (unlikely(err))
return err; return err;
return dst_input(skb); if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
} }
static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) static int seg6_input_nf(struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct net *net = dev_net(skb->dev);
switch (skb->protocol) {
case htons(ETH_P_IP):
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
skb, NULL, dev, seg6_input_core);
case htons(ETH_P_IPV6):
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
skb, NULL, dev, seg6_input_core);
}
return -EINVAL;
}
static int seg6_input(struct sk_buff *skb)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return seg6_input_nf(skb);
return seg6_input_core(dev_net(skb->dev), NULL, skb);
}
static int seg6_output_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{ {
struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
...@@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) ...@@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err)) if (unlikely(err))
goto drop; goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);
return dst_output(net, sk, skb); return dst_output(net, sk, skb);
drop: drop:
kfree_skb(skb); kfree_skb(skb);
return err; return err;
} }
static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
switch (skb->protocol) {
case htons(ETH_P_IP):
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
NULL, dev, seg6_output_core);
case htons(ETH_P_IPV6):
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
NULL, dev, seg6_output_core);
}
return -EINVAL;
}
static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return seg6_output_nf(net, sk, skb);
return seg6_output_core(net, sk, skb);
}
static int seg6_build_state(struct net *net, struct nlattr *nla, static int seg6_build_state(struct net *net, struct nlattr *nla,
unsigned int family, const void *cfg, unsigned int family, const void *cfg,
struct lwtunnel_state **ts, struct lwtunnel_state **ts,
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
#include <net/seg6_local.h> #include <net/seg6_local.h>
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <net/lwtunnel.h>
#include <linux/netfilter.h>
#define SEG6_F_ATTR(i) BIT(i) #define SEG6_F_ATTR(i) BIT(i)
...@@ -413,12 +415,33 @@ static int input_action_end_dx2(struct sk_buff *skb, ...@@ -413,12 +415,33 @@ static int input_action_end_dx2(struct sk_buff *skb,
return -EINVAL; return -EINVAL;
} }
static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct in6_addr *nhaddr = NULL;
struct seg6_local_lwt *slwt;
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
*
* If slwt->nh6 is set to ::, then lookup the nexthop for the
* inner packet's DA. Otherwise, use the specified nexthop.
*/
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
}
/* decapsulate and forward to specified nexthop */ /* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb, static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt) struct seg6_local_lwt *slwt)
{ {
struct in6_addr *nhaddr = NULL;
/* this function accepts IPv6 encapsulated packets, with either /* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH. * an SRH with SL=0, or no SRH.
*/ */
...@@ -429,40 +452,30 @@ static int input_action_end_dx6(struct sk_buff *skb, ...@@ -429,40 +452,30 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop; goto drop;
/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
*
* If slwt->nh6 is set to ::, then lookup the nexthop for the
* inner packet's DA. Otherwise, use the specified nexthop.
*/
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
skb_set_transport_header(skb, sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
seg6_lookup_nexthop(skb, nhaddr, 0); if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, input_action_end_dx6_finish);
return dst_input(skb); return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop: drop:
kfree_skb(skb); kfree_skb(skb);
return -EINVAL; return -EINVAL;
} }
static int input_action_end_dx4(struct sk_buff *skb, static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
struct seg6_local_lwt *slwt) struct sk_buff *skb)
{ {
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_local_lwt *slwt;
struct iphdr *iph; struct iphdr *iph;
__be32 nhaddr; __be32 nhaddr;
int err; int err;
if (!decap_and_validate(skb, IPPROTO_IPIP)) slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
goto drop;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
skb->protocol = htons(ETH_P_IP);
iph = ip_hdr(skb); iph = ip_hdr(skb);
...@@ -470,14 +483,34 @@ static int input_action_end_dx4(struct sk_buff *skb, ...@@ -470,14 +483,34 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb); skb_dst_drop(skb);
skb_set_transport_header(skb, sizeof(struct iphdr));
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
if (err) if (err) {
goto drop; kfree_skb(skb);
return -EINVAL;
}
return dst_input(skb); return dst_input(skb);
}
static int input_action_end_dx4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
skb->protocol = htons(ETH_P_IP);
skb_set_transport_header(skb, sizeof(struct iphdr));
nf_reset_ct(skb);
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, input_action_end_dx4_finish);
return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop: drop:
kfree_skb(skb); kfree_skb(skb);
return -EINVAL; return -EINVAL;
...@@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, ...@@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
skb_dst_drop(skb); skb_dst_drop(skb);
skb_set_transport_header(skb, hdrlen); skb_set_transport_header(skb, hdrlen);
nf_reset_ct(skb);
return end_dt_vrf_rcv(skb, family, vrf); return end_dt_vrf_rcv(skb, family, vrf);
...@@ -1078,7 +1112,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt, ...@@ -1078,7 +1112,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
u64_stats_update_end(&pcounters->syncp); u64_stats_update_end(&pcounters->syncp);
} }
static int seg6_local_input(struct sk_buff *skb) static int seg6_local_input_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{ {
struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc; struct seg6_action_desc *desc;
...@@ -1086,11 +1121,6 @@ static int seg6_local_input(struct sk_buff *skb) ...@@ -1086,11 +1121,6 @@ static int seg6_local_input(struct sk_buff *skb)
unsigned int len = skb->len; unsigned int len = skb->len;
int rc; int rc;
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}
slwt = seg6_local_lwtunnel(orig_dst->lwtstate); slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc; desc = slwt->desc;
...@@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb) ...@@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb)
return rc; return rc;
} }
static int seg6_local_input(struct sk_buff *skb)
{
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
seg6_local_input_core);
return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
}
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
......
...@@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/ ...@@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/
# IPVS # IPVS
obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_IP_VS) += ipvs/
# lwtunnel
obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o
...@@ -130,58 +130,77 @@ static void ecache_work(struct work_struct *work) ...@@ -130,58 +130,77 @@ static void ecache_work(struct work_struct *work)
schedule_delayed_work(&cnet->ecache_dwork, delay); schedule_delayed_work(&cnet->ecache_dwork, delay);
} }
int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
u32 portid, int report) const unsigned int events,
const unsigned long missed,
const struct nf_ct_event *item)
{ {
int ret = 0; struct nf_conn *ct = item->ct;
struct net *net = nf_ct_net(ct); struct net *net = nf_ct_net(item->ct);
struct nf_ct_event_notifier *notify; struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e; int ret;
if (!((events | missed) & e->ctmask))
return 0;
rcu_read_lock(); rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb); notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify) if (!notify) {
goto out_unlock; rcu_read_unlock();
return 0;
}
ret = notify->ct_event(events | missed, item);
rcu_read_unlock();
if (likely(ret >= 0 && missed == 0))
return 0;
spin_lock_bh(&ct->lock);
if (ret < 0)
e->missed |= events;
else
e->missed &= ~missed;
spin_unlock_bh(&ct->lock);
return ret;
}
int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
u32 portid, int report)
{
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
unsigned long missed;
int ret;
if (!nf_ct_is_confirmed(ct))
return 0;
e = nf_ct_ecache_find(ct); e = nf_ct_ecache_find(ct);
if (!e) if (!e)
goto out_unlock; return 0;
if (nf_ct_is_confirmed(ct)) { memset(&item, 0, sizeof(item));
struct nf_ct_event item = {
.ct = ct, item.ct = ct;
.portid = e->portid ? e->portid : portid, item.portid = e->portid ? e->portid : portid;
.report = report item.report = report;
};
/* This is a resent of a destroy event? If so, skip missed */ /* This is a resent of a destroy event? If so, skip missed */
unsigned long missed = e->portid ? 0 : e->missed; missed = e->portid ? 0 : e->missed;
if (!((eventmask | missed) & e->ctmask)) ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
goto out_unlock; if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
/* This is a destroy event that has been triggered by a process,
ret = notify->fcn(eventmask | missed, &item); * we store the PORTID to include it in the retransmission.
if (unlikely(ret < 0 || missed)) { */
spin_lock_bh(&ct->lock); if (e->portid == 0 && portid != 0)
if (ret < 0) { e->portid = portid;
/* This is a destroy event that has been e->state = NFCT_ECACHE_DESTROY_FAIL;
* triggered by a process, we store the PORTID
* to include it in the retransmission.
*/
if (eventmask & (1 << IPCT_DESTROY)) {
if (e->portid == 0 && portid != 0)
e->portid = portid;
e->state = NFCT_ECACHE_DESTROY_FAIL;
} else {
e->missed |= eventmask;
}
} else {
e->missed &= ~missed;
}
spin_unlock_bh(&ct->lock);
}
} }
out_unlock:
rcu_read_unlock();
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
...@@ -190,53 +209,28 @@ EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); ...@@ -190,53 +209,28 @@ EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
* disabled softirqs */ * disabled softirqs */
void nf_ct_deliver_cached_events(struct nf_conn *ct) void nf_ct_deliver_cached_events(struct nf_conn *ct)
{ {
struct net *net = nf_ct_net(ct);
unsigned long events, missed;
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e; struct nf_conntrack_ecache *e;
struct nf_ct_event item; struct nf_ct_event item;
int ret; unsigned long events;
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (notify == NULL)
goto out_unlock;
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
goto out_unlock; return;
e = nf_ct_ecache_find(ct); e = nf_ct_ecache_find(ct);
if (e == NULL) if (e == NULL)
goto out_unlock; return;
events = xchg(&e->cache, 0); events = xchg(&e->cache, 0);
/* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
* this does not harm and it happens very rarely. */
missed = e->missed;
if (!((events | missed) & e->ctmask))
goto out_unlock;
item.ct = ct; item.ct = ct;
item.portid = 0; item.portid = 0;
item.report = 0; item.report = 0;
ret = notify->fcn(events | missed, &item); /* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
if (likely(ret == 0 && !missed)) * this does not harm and it happens very rarely.
goto out_unlock; */
__nf_conntrack_eventmask_report(e, events, e->missed, &item);
spin_lock_bh(&ct->lock);
if (ret < 0)
e->missed |= events;
else
e->missed &= ~missed;
spin_unlock_bh(&ct->lock);
out_unlock:
rcu_read_unlock();
} }
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
...@@ -246,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, ...@@ -246,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
{ {
struct net *net = nf_ct_exp_net(exp); struct net *net = nf_ct_exp_net(exp);
struct nf_exp_event_notifier *notify; struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e; struct nf_conntrack_ecache *e;
rcu_read_lock(); rcu_read_lock();
notify = rcu_dereference(net->ct.nf_expect_event_cb); notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify) if (!notify)
goto out_unlock; goto out_unlock;
...@@ -264,86 +258,35 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, ...@@ -264,86 +258,35 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
.portid = portid, .portid = portid,
.report = report .report = report
}; };
notify->fcn(1 << event, &item); notify->exp_event(1 << event, &item);
} }
out_unlock: out_unlock:
rcu_read_unlock(); rcu_read_unlock();
} }
int nf_conntrack_register_notifier(struct net *net, void nf_conntrack_register_notifier(struct net *net,
struct nf_ct_event_notifier *new) const struct nf_ct_event_notifier *new)
{ {
int ret;
struct nf_ct_event_notifier *notify; struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex); mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex)); lockdep_is_held(&nf_ct_ecache_mutex));
if (notify != NULL) { WARN_ON_ONCE(notify);
ret = -EBUSY;
goto out_unlock;
}
rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
ret = 0;
out_unlock:
mutex_unlock(&nf_ct_ecache_mutex); mutex_unlock(&nf_ct_ecache_mutex);
return ret;
} }
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
void nf_conntrack_unregister_notifier(struct net *net, void nf_conntrack_unregister_notifier(struct net *net)
struct nf_ct_event_notifier *new)
{ {
struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex); mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex); mutex_unlock(&nf_ct_ecache_mutex);
/* synchronize_rcu() is called from ctnetlink_exit. */ /* synchronize_rcu() is called after netns pre_exit */
} }
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
int nf_ct_expect_register_notifier(struct net *net,
struct nf_exp_event_notifier *new)
{
int ret;
struct nf_exp_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
if (notify != NULL) {
ret = -EBUSY;
goto out_unlock;
}
rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
ret = 0;
out_unlock:
mutex_unlock(&nf_ct_ecache_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
void nf_ct_expect_unregister_notifier(struct net *net,
struct nf_exp_event_notifier *new)
{
struct nf_exp_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
/* synchronize_rcu() is called from ctnetlink_exit. */
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
{ {
struct nf_conntrack_net *cnet = nf_ct_pernet(net); struct nf_conntrack_net *cnet = nf_ct_pernet(net);
......
...@@ -706,7 +706,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) ...@@ -706,7 +706,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
} }
static int static int
ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
{ {
const struct nf_conntrack_zone *zone; const struct nf_conntrack_zone *zone;
struct net *net; struct net *net;
...@@ -2669,6 +2669,8 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) ...@@ -2669,6 +2669,8 @@ ctnetlink_glue_build_size(const struct nf_conn *ct)
+ nla_total_size(0) /* CTA_HELP */ + nla_total_size(0) /* CTA_HELP */
+ nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ ctnetlink_secctx_size(ct) + ctnetlink_secctx_size(ct)
+ ctnetlink_acct_size(ct)
+ ctnetlink_timestamp_size(ct)
#if IS_ENABLED(CONFIG_NF_NAT) #if IS_ENABLED(CONFIG_NF_NAT)
+ 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
...@@ -2726,6 +2728,10 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) ...@@ -2726,6 +2728,10 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
if (ctnetlink_dump_protoinfo(skb, ct, false) < 0) if (ctnetlink_dump_protoinfo(skb, ct, false) < 0)
goto nla_put_failure; goto nla_put_failure;
if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0)
goto nla_put_failure;
if (ctnetlink_dump_helpinfo(skb, ct) < 0) if (ctnetlink_dump_helpinfo(skb, ct) < 0)
goto nla_put_failure; goto nla_put_failure;
...@@ -3104,7 +3110,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, ...@@ -3104,7 +3110,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
#ifdef CONFIG_NF_CONNTRACK_EVENTS #ifdef CONFIG_NF_CONNTRACK_EVENTS
static int static int
ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item)
{ {
struct nf_conntrack_expect *exp = item->exp; struct nf_conntrack_expect *exp = item->exp;
struct net *net = nf_ct_exp_net(exp); struct net *net = nf_ct_exp_net(exp);
...@@ -3755,11 +3761,8 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb, ...@@ -3755,11 +3761,8 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb,
#ifdef CONFIG_NF_CONNTRACK_EVENTS #ifdef CONFIG_NF_CONNTRACK_EVENTS
static struct nf_ct_event_notifier ctnl_notifier = { static struct nf_ct_event_notifier ctnl_notifier = {
.fcn = ctnetlink_conntrack_event, .ct_event = ctnetlink_conntrack_event,
}; .exp_event = ctnetlink_expect_event,
static struct nf_exp_event_notifier ctnl_notifier_exp = {
.fcn = ctnetlink_expect_event,
}; };
#endif #endif
...@@ -3852,52 +3855,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); ...@@ -3852,52 +3855,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
static int __net_init ctnetlink_net_init(struct net *net) static int __net_init ctnetlink_net_init(struct net *net)
{ {
#ifdef CONFIG_NF_CONNTRACK_EVENTS #ifdef CONFIG_NF_CONNTRACK_EVENTS
int ret; nf_conntrack_register_notifier(net, &ctnl_notifier);
ret = nf_conntrack_register_notifier(net, &ctnl_notifier);
if (ret < 0) {
pr_err("ctnetlink_init: cannot register notifier.\n");
goto err_out;
}
ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp);
if (ret < 0) {
pr_err("ctnetlink_init: cannot expect register notifier.\n");
goto err_unreg_notifier;
}
#endif #endif
return 0; return 0;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
err_unreg_notifier:
nf_conntrack_unregister_notifier(net, &ctnl_notifier);
err_out:
return ret;
#endif
} }
static void ctnetlink_net_exit(struct net *net) static void ctnetlink_net_pre_exit(struct net *net)
{ {
#ifdef CONFIG_NF_CONNTRACK_EVENTS #ifdef CONFIG_NF_CONNTRACK_EVENTS
nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); nf_conntrack_unregister_notifier(net);
nf_conntrack_unregister_notifier(net, &ctnl_notifier);
#endif #endif
} }
static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
list_for_each_entry(net, net_exit_list, exit_list)
ctnetlink_net_exit(net);
/* wait for other cpus until they are done with ctnl_notifiers */
synchronize_rcu();
}
static struct pernet_operations ctnetlink_net_ops = { static struct pernet_operations ctnetlink_net_ops = {
.init = ctnetlink_net_init, .init = ctnetlink_net_init,
.exit_batch = ctnetlink_net_exit_batch, .pre_exit = ctnetlink_net_pre_exit,
}; };
static int __init ctnetlink_init(void) static int __init ctnetlink_init(void)
......
...@@ -22,6 +22,9 @@ ...@@ -22,6 +22,9 @@
#include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_timestamp.h>
#ifdef CONFIG_LWTUNNEL
#include <net/netfilter/nf_hooks_lwtunnel.h>
#endif
#include <linux/rculist_nulls.h> #include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly; static bool enable_hooks __read_mostly;
...@@ -612,6 +615,9 @@ enum nf_ct_sysctl_index { ...@@ -612,6 +615,9 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif #endif
#ifdef CONFIG_LWTUNNEL
NF_SYSCTL_CT_LWTUNNEL,
#endif
__NF_SYSCTL_CT_LAST_SYSCTL, __NF_SYSCTL_CT_LAST_SYSCTL,
}; };
...@@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = { ...@@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec_jiffies,
}, },
#endif
#ifdef CONFIG_LWTUNNEL
[NF_SYSCTL_CT_LWTUNNEL] = {
.procname = "nf_hooks_lwtunnel",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = nf_hooks_lwtunnel_sysctl_handler,
},
#endif #endif
{} {}
}; };
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/sysctl.h>
#include <net/lwtunnel.h>
#include <net/netfilter/nf_hooks_lwtunnel.h>
static inline int nf_hooks_lwtunnel_get(void)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return 1;
else
return 0;
}
static inline int nf_hooks_lwtunnel_set(int enable)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) {
if (!enable)
return -EBUSY;
} else if (enable) {
static_branch_enable(&nf_hooks_lwtunnel_enabled);
}
return 0;
}
#ifdef CONFIG_SYSCTL
int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int proc_nf_hooks_lwtunnel_enabled = 0;
struct ctl_table tmp = {
.procname = table->procname,
.data = &proc_nf_hooks_lwtunnel_enabled,
.maxlen = sizeof(int),
.mode = table->mode,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};
int ret;
if (!write)
proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get();
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled);
return ret;
}
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
#endif /* CONFIG_SYSCTL */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment