Commit c3e53369 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Support for destination MAC in ipset, from Stefano Brivio.

2) Disallow all-zeroes MAC address in ipset, also from Stefano.

3) Add IPSET_CMD_GET_BYNAME and IPSET_CMD_GET_BYINDEX commands,
   introduce protocol version number 7, from Jozsef Kadlecsik.
   A follow up patch to fix ip_set_byindex() is also included
   in this batch.

4) Honor CTA_MARK_MASK from ctnetlink, from Andreas Jaggi.

5) Statify nf_flow_table_iterate(), from Taehee Yoo.

6) Use nf_flow_table_iterate() to simplify garbage collection in
   nf_flow_table logic, also from Taehee Yoo.

7) Don't use _bh variants of call_rcu(), rcu_barrier() and
   synchronize_rcu_bh() in Netfilter, from Paul E. McKenney.

8) Remove NFC_* cache definition from the old caching
   infrastructure.

9) Remove layer 4 port rover in NAT helpers, use random port
   instead, from Florian Westphal.

10) Use strscpy() in ipset, from Qian Cai.

11) Remove NF_NAT_RANGE_PROTO_RANDOM_FULLY branch now that
    random port is allocated by default, from Xiaozhou Liu.

12) Ignore NF_NAT_RANGE_PROTO_RANDOM too, from Florian Westphal.

13) Limit port allocation selection routine in NAT to avoid
    softlockup splats when most ports are in use, from Florian.

14) Remove unused parameters in nf_ct_l4proto_unregister_sysctl()
    from Yafang Shao.

15) Direct call to nf_nat_l4proto_unique_tuple() instead of
    indirection, from Florian Westphal.

16) Several patches to remove all layer 4 NAT indirections,
    remove nf_nat_l4proto struct, from Florian Westphal.

17) Fix RTP/RTCP source port translation when SNAT is in place,
    from Alin Nastac.

18) Selective rule dump per chain, from Phil Sutter.

19) Revisit CLUSTERIP target, this includes a deadlock fix from
    netns path, sleep in atomic, remove bogus WARN_ON_ONCE()
    and disallow mismatching IP address and MAC address.
    Patchset from Taehee Yoo.

20) Update UDP timeout to stream after 2 seconds, from Florian.

21) Shrink UDP established timeout to 120 seconds like TCP timewait.

22) Sysctl knobs to set GRE timeouts, from Yafang Shao.

23) Move seq_print_acct() to conntrack core file, from Florian.

24) Add enum for conntrack sysctl knobs, also from Florian.

25) Place nf_conntrack_acct, nf_conntrack_helper, nf_conntrack_events
    and nf_conntrack_timestamp knobs in the core, from Florian Westphal.
    As a side effect, shrink netns_ct structure by removing obsolete
    sysctl anchors, also from Florian.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 339bbff2 8527f9df
......@@ -157,7 +157,16 @@ nf_conntrack_udp_timeout - INTEGER (seconds)
default 30
nf_conntrack_udp_timeout_stream - INTEGER (seconds)
default 180
default 120
This extended timeout will be used in case there is an UDP stream
detected.
nf_conntrack_gre_timeout - INTEGER (seconds)
default 30
nf_conntrack_gre_timeout_stream - INTEGER (seconds)
default 180
This extended timeout will be used in case there is an GRE stream
detected.
......@@ -303,11 +303,11 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
/* Netlink CB args */
enum {
IPSET_CB_NET = 0, /* net namespace */
IPSET_CB_PROTO, /* ipset protocol */
IPSET_CB_DUMP, /* dump single set/all sets */
IPSET_CB_INDEX, /* set index */
IPSET_CB_PRIVATE, /* set private data */
IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
};
/* register and unregister set references */
......
......@@ -41,7 +41,5 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
/* delete keymap entries */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
void nf_nat_need_gre(void);
#endif /* __KERNEL__ */
#endif /* _CONNTRACK_PROTO_GRE_H */
......@@ -27,12 +27,17 @@
#include <net/netfilter/nf_conntrack_tuple.h>
struct nf_ct_udp {
unsigned long stream_ts;
};
/* per conntrack: protocol private data */
union nf_conntrack_proto {
/* insert conntrack proto private data here */
struct nf_ct_dccp dccp;
struct ip_ct_sctp sctp;
struct ip_ct_tcp tcp;
struct nf_ct_udp udp;
struct nf_ct_gre gre;
unsigned int tmpl_padto;
};
......
......@@ -46,9 +46,6 @@ struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
return acct;
};
unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct,
int dir);
/* Check if connection tracking accounting is enabled */
static inline bool nf_ct_acct_enabled(struct net *net)
{
......@@ -61,8 +58,7 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
net->ct.sysctl_acct = enable;
}
int nf_conntrack_acct_pernet_init(struct net *net);
void nf_conntrack_acct_pernet_fini(struct net *net);
void nf_conntrack_acct_pernet_init(struct net *net);
int nf_conntrack_acct_init(void);
void nf_conntrack_acct_fini(void);
......
......@@ -142,7 +142,7 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp,
u32 portid, int report);
int nf_conntrack_ecache_pernet_init(struct net *net);
void nf_conntrack_ecache_pernet_init(struct net *net);
void nf_conntrack_ecache_pernet_fini(struct net *net);
int nf_conntrack_ecache_init(void);
......@@ -182,10 +182,7 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
u32 portid,
int report) {}
static inline int nf_conntrack_ecache_pernet_init(struct net *net)
{
return 0;
}
static inline void nf_conntrack_ecache_pernet_init(struct net *net) {}
static inline void nf_conntrack_ecache_pernet_fini(struct net *net)
{
......
......@@ -124,8 +124,7 @@ static inline void *nfct_help_data(const struct nf_conn *ct)
return (void *)help->data;
}
int nf_conntrack_helper_pernet_init(struct net *net);
void nf_conntrack_helper_pernet_fini(struct net *net);
void nf_conntrack_helper_pernet_init(struct net *net);
int nf_conntrack_helper_init(void);
void nf_conntrack_helper_fini(void);
......
......@@ -49,21 +49,12 @@ static inline void nf_ct_set_tstamp(struct net *net, bool enable)
}
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
int nf_conntrack_tstamp_pernet_init(struct net *net);
void nf_conntrack_tstamp_pernet_fini(struct net *net);
void nf_conntrack_tstamp_pernet_init(struct net *net);
int nf_conntrack_tstamp_init(void);
void nf_conntrack_tstamp_fini(void);
#else
static inline int nf_conntrack_tstamp_pernet_init(struct net *net)
{
return 0;
}
static inline void nf_conntrack_tstamp_pernet_fini(struct net *net)
{
return;
}
static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {}
static inline int nf_conntrack_tstamp_init(void)
{
......
......@@ -95,10 +95,6 @@ void flow_offload_free(struct flow_offload *flow);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data);
void nf_flow_table_cleanup(struct net_device *dev);
int nf_flow_table_init(struct nf_flowtable *flow_table);
......
......@@ -2,18 +2,11 @@
#ifndef _NF_NAT_L3PROTO_H
#define _NF_NAT_L3PROTO_H
struct nf_nat_l4proto;
struct nf_nat_l3proto {
u8 l3proto;
bool (*in_range)(const struct nf_conntrack_tuple *t,
const struct nf_nat_range2 *range);
u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16);
bool (*manip_pkt)(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype);
......
......@@ -5,78 +5,12 @@
#include <net/netfilter/nf_nat.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
struct nf_nat_range;
struct nf_nat_l3proto;
struct nf_nat_l4proto {
/* Protocol number. */
u8 l4proto;
/* Translate a packet to the target according to manip type.
* Return true if succeeded.
*/
bool (*manip_pkt)(struct sk_buff *skb,
/* Translate a packet to the target according to manip type. Return on success. */
bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype);
/* Is the manipable part of the tuple between min and max incl? */
bool (*in_range)(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
const union nf_conntrack_man_proto *max);
/* Alter the per-proto part of the tuple (depending on
* maniptype), to give a unique tuple in the given range if
* possible. Per-protocol part of tuple is initialized to the
* incoming packet.
*/
void (*unique_tuple)(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct);
int (*nlattr_to_range)(struct nlattr *tb[],
struct nf_nat_range2 *range);
};
/* Protocol registration. */
int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto);
void nf_nat_l4proto_unregister(u8 l3proto,
const struct nf_nat_l4proto *l4proto);
const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto);
/* Built-in protocols. */
extern const struct nf_nat_l4proto nf_nat_l4proto_tcp;
extern const struct nf_nat_l4proto nf_nat_l4proto_udp;
extern const struct nf_nat_l4proto nf_nat_l4proto_icmp;
extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6;
extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
#ifdef CONFIG_NF_NAT_PROTO_DCCP
extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
#endif
#ifdef CONFIG_NF_NAT_PROTO_SCTP
extern const struct nf_nat_l4proto nf_nat_l4proto_sctp;
#endif
#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
extern const struct nf_nat_l4proto nf_nat_l4proto_udplite;
#endif
bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
const union nf_conntrack_man_proto *max);
void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct, u16 *rover);
int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range2 *range);
#endif /*_NF_NAT_L4PROTO_H*/
......@@ -97,18 +97,14 @@ struct netns_ct {
struct delayed_work ecache_dwork;
bool ecache_dwork_pending;
#endif
bool auto_assign_helper_warned;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *tstamp_sysctl_header;
struct ctl_table_header *event_sysctl_header;
struct ctl_table_header *helper_sysctl_header;
#endif
unsigned int sysctl_log_invalid; /* Log invalid packets */
int sysctl_events;
int sysctl_acct;
int sysctl_auto_assign_helper;
bool auto_assign_helper_warned;
int sysctl_tstamp;
int sysctl_checksum;
......
......@@ -34,10 +34,6 @@
/* only for userspace compatibility */
#ifndef __KERNEL__
/* Generic cache responses from hook functions.
<= 0x2000 is used for protocol-flags. */
#define NFC_UNKNOWN 0x4000
#define NFC_ALTERED 0x8000
/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
#define NF_VERDICT_BITS 16
......
......@@ -13,8 +13,9 @@
#include <linux/types.h>
/* The protocol version */
#define IPSET_PROTOCOL 6
/* The protocol versions */
#define IPSET_PROTOCOL 7
#define IPSET_PROTOCOL_MIN 6
/* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN 32
......@@ -38,17 +39,19 @@ enum ipset_cmd {
IPSET_CMD_TEST, /* 11: Test an element in a set */
IPSET_CMD_HEADER, /* 12: Get set header data only */
IPSET_CMD_TYPE, /* 13: Get set type */
IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */
IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */
IPSET_MSG_MAX, /* Netlink message commands */
/* Commands in userspace: */
IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */
IPSET_CMD_HELP, /* 15: Get help */
IPSET_CMD_VERSION, /* 16: Get program version */
IPSET_CMD_QUIT, /* 17: Quit from interactive mode */
IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */
IPSET_CMD_HELP, /* 17: Get help */
IPSET_CMD_VERSION, /* 18: Get program version */
IPSET_CMD_QUIT, /* 19: Quit from interactive mode */
IPSET_CMD_MAX,
IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */
IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */
};
/* Attributes at command level */
......@@ -66,6 +69,7 @@ enum {
IPSET_ATTR_LINENO, /* 9: Restore lineno */
IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
IPSET_ATTR_INDEX, /* 11: Kernel index of set */
__IPSET_ATTR_CMD_MAX,
};
#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1)
......@@ -223,6 +227,7 @@ enum ipset_adt {
/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t
* and IPSET_INVALID_ID if you want to increase the max number of sets.
* Also, IPSET_ATTR_INDEX must be changed.
*/
typedef __u16 ip_set_id_t;
......
......@@ -15,16 +15,6 @@
#include <limits.h> /* for INT_MIN, INT_MAX */
/* IP Cache bits. */
/* Src IP address. */
#define NFC_DN_SRC 0x0001
/* Dest IP address. */
#define NFC_DN_DST 0x0002
/* Input device. */
#define NFC_DN_IF_IN 0x0004
/* Output device. */
#define NFC_DN_IF_OUT 0x0008
/* kernel define is in netfilter_defs.h */
#define NF_DN_NUMHOOKS 7
#endif /* ! __KERNEL__ */
......
......@@ -13,34 +13,6 @@
#include <limits.h> /* for INT_MIN, INT_MAX */
/* IP Cache bits. */
/* Src IP address. */
#define NFC_IP_SRC 0x0001
/* Dest IP address. */
#define NFC_IP_DST 0x0002
/* Input device. */
#define NFC_IP_IF_IN 0x0004
/* Output device. */
#define NFC_IP_IF_OUT 0x0008
/* TOS. */
#define NFC_IP_TOS 0x0010
/* Protocol. */
#define NFC_IP_PROTO 0x0020
/* IP options. */
#define NFC_IP_OPTIONS 0x0040
/* Frag & flags. */
#define NFC_IP_FRAG 0x0080
/* Per-protocol information: only matters if proto match. */
/* TCP flags. */
#define NFC_IP_TCPFLAGS 0x0100
/* Source port. */
#define NFC_IP_SRC_PT 0x0200
/* Dest port. */
#define NFC_IP_DST_PT 0x0400
/* Something else about the proto */
#define NFC_IP_PROTO_UNKNOWN 0x2000
/* IP Hooks */
/* After promisc drops, checksum checks. */
#define NF_IP_PRE_ROUTING 0
......
......@@ -16,35 +16,6 @@
#include <limits.h> /* for INT_MIN, INT_MAX */
/* IP Cache bits. */
/* Src IP address. */
#define NFC_IP6_SRC 0x0001
/* Dest IP address. */
#define NFC_IP6_DST 0x0002
/* Input device. */
#define NFC_IP6_IF_IN 0x0004
/* Output device. */
#define NFC_IP6_IF_OUT 0x0008
/* TOS. */
#define NFC_IP6_TOS 0x0010
/* Protocol. */
#define NFC_IP6_PROTO 0x0020
/* IP options. */
#define NFC_IP6_OPTIONS 0x0040
/* Frag & flags. */
#define NFC_IP6_FRAG 0x0080
/* Per-protocol information: only matters if proto match. */
/* TCP flags. */
#define NFC_IP6_TCPFLAGS 0x0100
/* Source port. */
#define NFC_IP6_SRC_PT 0x0200
/* Dest port. */
#define NFC_IP6_DST_PT 0x0400
/* Something else about the proto */
#define NFC_IP6_PROTO_UNKNOWN 0x2000
/* IP6 Hooks */
/* After promisc drops, checksum checks. */
#define NF_IP6_PRE_ROUTING 0
......
......@@ -156,15 +156,10 @@ config NF_NAT_SNMP_BASIC
To compile it as a module, choose M here. If unsure, say N.
config NF_NAT_PROTO_GRE
tristate
depends on NF_CT_PROTO_GRE
config NF_NAT_PPTP
tristate
depends on NF_CONNTRACK
default NF_CONNTRACK_PPTP
select NF_NAT_PROTO_GRE
config NF_NAT_H323
tristate
......
......@@ -3,7 +3,7 @@
# Makefile for the netfilter modules on top of IPv4.
#
nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o
nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
......@@ -28,9 +28,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
......
......@@ -56,18 +56,15 @@ struct clusterip_config {
#endif
enum clusterip_hashmode hash_mode; /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
struct rcu_head rcu;
struct rcu_head rcu; /* for call_rcu_bh */
struct net *net; /* netns for pernet list */
char ifname[IFNAMSIZ]; /* device ifname */
struct notifier_block notifier; /* refresh c->ifindex in it */
};
#ifdef CONFIG_PROC_FS
static const struct file_operations clusterip_proc_fops;
#endif
static unsigned int clusterip_net_id __read_mostly;
struct clusterip_net {
struct list_head configs;
/* lock protects the configs list */
......@@ -75,51 +72,66 @@ struct clusterip_net {
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *procdir;
/* mutex protects the config->pde*/
struct mutex mutex;
#endif
};
static unsigned int clusterip_net_id __read_mostly;
static inline struct clusterip_net *clusterip_pernet(struct net *net)
{
return net_generic(net, clusterip_net_id);
}
static inline void
clusterip_config_get(struct clusterip_config *c)
{
refcount_inc(&c->refcount);
}
static void clusterip_config_rcu_free(struct rcu_head *head)
{
kfree(container_of(head, struct clusterip_config, rcu));
struct clusterip_config *config;
struct net_device *dev;
config = container_of(head, struct clusterip_config, rcu);
dev = dev_get_by_name(config->net, config->ifname);
if (dev) {
dev_mc_del(dev, config->clustermac);
dev_put(dev);
}
kfree(config);
}
static inline void
clusterip_config_put(struct clusterip_config *c)
{
if (refcount_dec_and_test(&c->refcount))
call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
call_rcu(&c->rcu, clusterip_config_rcu_free);
}
/* decrease the count of entries using/referencing this config. If last
* entry(rule) is removed, remove the config from lists, but don't free it
* yet, since proc-files could still be holding references */
static inline void
clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
clusterip_config_entry_put(struct clusterip_config *c)
{
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
struct clusterip_net *cn = clusterip_pernet(c->net);
local_bh_disable();
if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
list_del_rcu(&c->list);
spin_unlock(&cn->lock);
local_bh_enable();
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
mutex_lock(&cn->mutex);
if (cn->procdir)
proc_remove(c->pde);
mutex_unlock(&cn->mutex);
#endif
list_del_rcu(&c->list);
spin_unlock(&cn->lock);
local_bh_enable();
unregister_netdevice_notifier(&c->notifier);
return;
}
local_bh_enable();
......@@ -129,7 +141,7 @@ static struct clusterip_config *
__clusterip_config_find(struct net *net, __be32 clusterip)
{
struct clusterip_config *c;
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
struct clusterip_net *cn = clusterip_pernet(net);
list_for_each_entry_rcu(c, &cn->configs, list) {
if (c->clusterip == clusterip)
......@@ -181,9 +193,12 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct clusterip_net *cn = clusterip_pernet(net);
struct clusterip_config *c;
c = container_of(this, struct clusterip_config, notifier);
spin_lock_bh(&cn->lock);
list_for_each_entry_rcu(c, &cn->configs, list) {
switch (event) {
case NETDEV_REGISTER:
if (!strcmp(dev->name, c->ifname)) {
......@@ -207,6 +222,8 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event,
}
break;
}
}
spin_unlock_bh(&cn->lock);
return NOTIFY_DONE;
}
......@@ -215,30 +232,44 @@ static struct clusterip_config *
clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
__be32 ip, const char *iniface)
{
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
struct clusterip_net *cn = clusterip_pernet(net);
struct clusterip_config *c;
struct net_device *dev;
int err;
if (iniface[0] == '\0') {
pr_info("Please specify an interface name\n");
return ERR_PTR(-EINVAL);
}
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return ERR_PTR(-ENOMEM);
strcpy(c->ifname, iniface);
c->ifindex = -1;
c->clusterip = ip;
dev = dev_get_by_name(net, iniface);
if (!dev) {
pr_info("no such interface %s\n", iniface);
kfree(c);
return ERR_PTR(-ENOENT);
}
c->ifindex = dev->ifindex;
strcpy(c->ifname, dev->name);
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
dev_mc_add(dev, c->clustermac);
dev_put(dev);
c->clusterip = ip;
c->num_total_nodes = i->num_total_nodes;
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
c->net = net;
refcount_set(&c->refcount, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
spin_unlock_bh(&cn->lock);
kfree(c);
return ERR_PTR(-EBUSY);
err = -EBUSY;
goto out_config_put;
}
list_add_rcu(&c->list, &cn->configs);
......@@ -250,9 +281,11 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
mutex_lock(&cn->mutex);
c->pde = proc_create_data(buffer, 0600,
cn->procdir,
&clusterip_proc_fops, c);
mutex_unlock(&cn->mutex);
if (!c->pde) {
err = -ENOMEM;
goto err;
......@@ -260,22 +293,17 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
}
#endif
c->notifier.notifier_call = clusterip_netdev_event;
err = register_netdevice_notifier(&c->notifier);
if (!err) {
refcount_set(&c->entries, 1);
return c;
}
#ifdef CONFIG_PROC_FS
proc_remove(c->pde);
err:
#endif
spin_lock_bh(&cn->lock);
list_del_rcu(&c->list);
out_config_put:
spin_unlock_bh(&cn->lock);
clusterip_config_put(c);
return ERR_PTR(err);
}
......@@ -475,34 +503,20 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
&e->ip.dst.s_addr);
return -EINVAL;
} else {
struct net_device *dev;
if (e->ip.iniface[0] == '\0') {
pr_info("Please specify an interface name\n");
return -EINVAL;
}
dev = dev_get_by_name(par->net, e->ip.iniface);
if (!dev) {
pr_info("no such interface %s\n",
e->ip.iniface);
return -ENOENT;
}
dev_put(dev);
config = clusterip_config_init(par->net, cipinfo,
e->ip.dst.s_addr,
e->ip.iniface);
if (IS_ERR(config))
return PTR_ERR(config);
}
}
} else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
return -EINVAL;
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
clusterip_config_entry_put(par->net, config);
clusterip_config_entry_put(config);
clusterip_config_put(config);
return ret;
}
......@@ -524,7 +538,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
clusterip_config_entry_put(par->net, cipinfo->config);
clusterip_config_entry_put(cipinfo->config);
clusterip_config_put(cipinfo->config);
......@@ -806,7 +820,7 @@ static const struct file_operations clusterip_proc_fops = {
static int clusterip_net_init(struct net *net)
{
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
struct clusterip_net *cn = clusterip_pernet(net);
int ret;
INIT_LIST_HEAD(&cn->configs);
......@@ -824,6 +838,7 @@ static int clusterip_net_init(struct net *net)
pr_err("Unable to proc dir entry\n");
return -ENOMEM;
}
mutex_init(&cn->mutex);
#endif /* CONFIG_PROC_FS */
return 0;
......@@ -831,13 +846,15 @@ static int clusterip_net_init(struct net *net)
static void clusterip_net_exit(struct net *net)
{
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
struct clusterip_net *cn = clusterip_pernet(net);
#ifdef CONFIG_PROC_FS
mutex_lock(&cn->mutex);
proc_remove(cn->procdir);
cn->procdir = NULL;
mutex_unlock(&cn->mutex);
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
WARN_ON_ONCE(!list_empty(&cn->configs));
}
static struct pernet_operations clusterip_net_ops = {
......@@ -847,6 +864,10 @@ static struct pernet_operations clusterip_net_ops = {
.size = sizeof(struct clusterip_net),
};
struct notifier_block cip_netdev_notifier = {
.notifier_call = clusterip_netdev_event
};
static int __init clusterip_tg_init(void)
{
int ret;
......@@ -859,11 +880,17 @@ static int __init clusterip_tg_init(void)
if (ret < 0)
goto cleanup_subsys;
ret = register_netdevice_notifier(&cip_netdev_notifier);
if (ret < 0)
goto unregister_target;
pr_info("ClusterIP Version %s loaded successfully\n",
CLUSTERIP_VERSION);
return 0;
unregister_target:
xt_unregister_target(&clusterip_tg_reg);
cleanup_subsys:
unregister_pernet_subsys(&clusterip_net_ops);
return ret;
......@@ -873,11 +900,12 @@ static void __exit clusterip_tg_exit(void)
{
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
unregister_netdevice_notifier(&cip_netdev_notifier);
xt_unregister_target(&clusterip_tg_reg);
unregister_pernet_subsys(&clusterip_net_ops);
/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
rcu_barrier_bh();
/* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
rcu_barrier();
}
module_init(clusterip_tg_init);
......
......@@ -62,22 +62,8 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
}
#endif /* CONFIG_XFRM */
static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
const struct nf_nat_range2 *range)
{
return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
}
static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
__be16 dport)
{
return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
}
static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
{
......@@ -90,8 +76,8 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
iph = (void *)skb->data + iphdroff;
hdroff = iphdroff + iph->ihl * 4;
if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
target, maniptype))
if (!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
hdroff, target, maniptype))
return false;
iph = (void *)skb->data + iphdroff;
......@@ -161,8 +147,6 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
.l3proto = NFPROTO_IPV4,
.in_range = nf_nat_ipv4_in_range,
.secure_port = nf_nat_ipv4_secure_port,
.manip_pkt = nf_nat_ipv4_manip_pkt,
.csum_update = nf_nat_ipv4_csum_update,
.csum_recalc = nf_nat_ipv4_csum_recalc,
......@@ -186,7 +170,6 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
unsigned int hdrlen = ip_hdrlen(skb);
const struct nf_nat_l4proto *l4proto;
struct nf_conntrack_tuple target;
unsigned long statusbit;
......@@ -217,9 +200,8 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
if (!(ct->status & statusbit))
return 1;
l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
l4proto, &ct->tuplehash[!dir].tuple, !manip))
&ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
......@@ -233,8 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
/* Change outer to look like the reply to an incoming packet */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
return 0;
return 1;
......@@ -391,26 +372,12 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
static int __init nf_nat_l3proto_ipv4_init(void)
{
int err;
err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
if (err < 0)
goto err1;
err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
if (err < 0)
goto err2;
return err;
err2:
nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
err1:
return err;
return nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
}
static void __exit nf_nat_l3proto_ipv4_exit(void)
{
nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
}
MODULE_LICENSE("GPL");
......
......@@ -299,8 +299,6 @@ pptp_inbound_pkt(struct sk_buff *skb,
static int __init nf_nat_helper_pptp_init(void)
{
nf_nat_need_gre();
BUG_ON(nf_nat_pptp_hook_outbound != NULL);
RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
......
/*
* nf_nat_proto_gre.c
*
* NAT protocol helper module for GRE.
*
* GRE is a generic encapsulation protocol, which is generally not very
* suited for NAT, as it has no protocol-specific part as port numbers.
*
* It has an optional key field, which may help us distinguishing two
* connections between the same two hosts.
*
* GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
*
* PPTP is built on top of a modified version of GRE, and has a mandatory
* field called "CallID", which serves us for the same purpose as the key
* field in plain GRE.
*
* Documentation about PPTP can be found in RFC 2637
*
* (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
* (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l4proto.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
/* generate unique tuple ... */
static void
gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
static u_int16_t key;
__be16 *keyptr;
unsigned int min, i, range_size;
/* If there is no master conntrack we are not PPTP,
do not change tuples */
if (!ct->master)
return;
if (maniptype == NF_NAT_MANIP_SRC)
keyptr = &tuple->src.u.gre.key;
else
keyptr = &tuple->dst.u.gre.key;
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
pr_debug("%p: NATing GRE PPTP\n", ct);
min = 1;
range_size = 0xffff;
} else {
min = ntohs(range->min_proto.gre.key);
range_size = ntohs(range->max_proto.gre.key) - min + 1;
}
pr_debug("min = %u, range_size = %u\n", min, range_size);
for (i = 0; ; ++key) {
*keyptr = htons(min + key % range_size);
if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
return;
}
pr_debug("%p: no NAT mapping\n", ct);
return;
}
/* manipulate a GRE packet according to maniptype */
static bool
gre_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
const struct gre_base_hdr *greh;
struct pptp_gre_header *pgreh;
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
return false;
greh = (void *)skb->data + hdroff;
pgreh = (struct pptp_gre_header *)greh;
/* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
if (maniptype != NF_NAT_MANIP_DST)
return true;
switch (greh->flags & GRE_VERSION) {
case GRE_VERSION_0:
/* We do not currently NAT any GREv0 packets.
* Try to behave like "nf_nat_proto_unknown" */
break;
case GRE_VERSION_1:
pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
default:
pr_debug("can't nat unknown GRE version\n");
return false;
}
return true;
}
static const struct nf_nat_l4proto gre = {
.l4proto = IPPROTO_GRE,
.manip_pkt = gre_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = gre_unique_tuple,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
static int __init nf_nat_proto_gre_init(void)
{
return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
}
static void __exit nf_nat_proto_gre_fini(void)
{
nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
}
module_init(nf_nat_proto_gre_init);
module_exit(nf_nat_proto_gre_fini);
void nf_nat_need_gre(void)
{
return;
}
EXPORT_SYMBOL_GPL(nf_nat_need_gre);
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l4proto.h>
static bool
icmp_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
const union nf_conntrack_man_proto *max)
{
return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
}
static void
icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
static u_int16_t id;
unsigned int range_size;
unsigned int i;
range_size = ntohs(range->max_proto.icmp.id) -
ntohs(range->min_proto.icmp.id) + 1;
/* If no range specified... */
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
range_size = 0xFFFF;
for (i = 0; ; ++id) {
tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
(id % range_size));
if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
return;
}
return;
}
static bool
icmp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct icmphdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmphdr *)(skb->data + hdroff);
inet_proto_csum_replace2(&hdr->checksum, skb,
hdr->un.echo.id, tuple->src.u.icmp.id, false);
hdr->un.echo.id = tuple->src.u.icmp.id;
return true;
}
const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
.l4proto = IPPROTO_ICMP,
.manip_pkt = icmp_manip_pkt,
.in_range = icmp_in_range,
.unique_tuple = icmp_unique_tuple,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
......@@ -11,7 +11,7 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o
nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
......
......@@ -61,22 +61,8 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
}
#endif
static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
const struct nf_nat_range2 *range)
{
return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
}
static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
__be16 dport)
{
return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
}
static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
{
......@@ -96,7 +82,7 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
goto manip_addr;
if ((frag_off & htons(~0x7)) == 0 &&
!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
target, maniptype))
return false;
......@@ -171,8 +157,6 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.l3proto = NFPROTO_IPV6,
.secure_port = nf_nat_ipv6_secure_port,
.in_range = nf_nat_ipv6_in_range,
.manip_pkt = nf_nat_ipv6_manip_pkt,
.csum_update = nf_nat_ipv6_csum_update,
.csum_recalc = nf_nat_ipv6_csum_recalc,
......@@ -196,7 +180,6 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
} *inside;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
const struct nf_nat_l4proto *l4proto;
struct nf_conntrack_tuple target;
unsigned long statusbit;
......@@ -227,9 +210,8 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
if (!(ct->status & statusbit))
return 1;
l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
l4proto, &ct->tuplehash[!dir].tuple, !manip))
&ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
......@@ -244,8 +226,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
}
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
return 0;
return 1;
......@@ -415,26 +396,12 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
static int __init nf_nat_l3proto_ipv6_init(void)
{
int err;
err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
if (err < 0)
goto err1;
err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
if (err < 0)
goto err2;
return err;
err2:
nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
err1:
return err;
return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
}
static void __exit nf_nat_l3proto_ipv6_exit(void)
{
nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
}
MODULE_LICENSE("GPL");
......
/*
* Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
* NAT funded by Astaro.
*/
#include <linux/types.h>
#include <linux/init.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
static bool
icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
const union nf_conntrack_man_proto *max)
{
return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
}
static void
icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
static u16 id;
unsigned int range_size;
unsigned int i;
range_size = ntohs(range->max_proto.icmp.id) -
ntohs(range->min_proto.icmp.id) + 1;
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
range_size = 0xffff;
for (i = 0; ; ++id) {
tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
(id % range_size));
if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
return;
}
}
static bool
icmpv6_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct icmp6hdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmp6hdr *)(skb->data + hdroff);
l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
tuple, maniptype);
if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
tuple->src.u.icmp.id, false);
hdr->icmp6_identifier = tuple->src.u.icmp.id;
}
return true;
}
const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
.l4proto = IPPROTO_ICMPV6,
.manip_pkt = icmpv6_manip_pkt,
.in_range = icmpv6_in_range,
.unique_tuple = icmpv6_unique_tuple,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
......@@ -403,21 +403,6 @@ config NF_NAT_NEEDED
depends on NF_NAT
default y
config NF_NAT_PROTO_DCCP
bool
depends on NF_NAT && NF_CT_PROTO_DCCP
default NF_NAT && NF_CT_PROTO_DCCP
config NF_NAT_PROTO_UDPLITE
bool
depends on NF_NAT && NF_CT_PROTO_UDPLITE
default NF_NAT && NF_CT_PROTO_UDPLITE
config NF_NAT_PROTO_SCTP
bool
default NF_NAT && NF_CT_PROTO_SCTP
depends on NF_NAT && NF_CT_PROTO_SCTP
config NF_NAT_AMANDA
tristate
depends on NF_CONNTRACK && NF_NAT
......
......@@ -47,12 +47,7 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
# NAT protocols (nf_nat)
nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
nf_nat-y := nf_nat_core.o nf_nat_proto.o nf_nat_helper.o
# generic transport layer logging
obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
......
......@@ -219,10 +219,6 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
/* MAC can be src only */
if (!(opt->flags & IPSET_DIM_TWO_SRC))
return 0;
ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
if (ip < map->first_ip || ip > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
......@@ -233,7 +229,14 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
return -EINVAL;
e.id = ip_to_id(map, ip);
memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
if (opt->flags & IPSET_DIM_ONE_SRC)
ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
else
ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
if (is_zero_ether_addr(e.ether))
return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
......
......@@ -771,11 +771,21 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
* The commands are serialized by the nfnl mutex.
*/
static inline u8 protocol(const struct nlattr * const tb[])
{
return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
}
static inline bool
protocol_failed(const struct nlattr * const tb[])
{
return !tb[IPSET_ATTR_PROTOCOL] ||
nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
}
static inline bool
protocol_min_failed(const struct nlattr * const tb[])
{
return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
}
static inline u32
......@@ -889,7 +899,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
u32 flags = flag_exist(nlh);
int ret = 0;
if (unlikely(protocol_failed(attr) ||
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_TYPENAME] ||
!attr[IPSET_ATTR_REVISION] ||
......@@ -1027,7 +1037,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
ip_set_id_t i;
int ret = 0;
if (unlikely(protocol_failed(attr)))
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
/* Must wait for flush to be really finished in list:set */
......@@ -1105,7 +1115,7 @@ static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
struct ip_set *s;
ip_set_id_t i;
if (unlikely(protocol_failed(attr)))
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
if (!attr[IPSET_ATTR_SETNAME]) {
......@@ -1147,7 +1157,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
ip_set_id_t i;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL;
......@@ -1196,7 +1206,7 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ip_set_id_t from_id, to_id;
char from_name[IPSET_MAXNAMELEN];
if (unlikely(protocol_failed(attr) ||
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL;
......@@ -1291,6 +1301,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len,
ip_set_setname_policy, NULL);
cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
......@@ -1392,7 +1403,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
ret = -EMSGSIZE;
goto release_refcount;
}
if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
cb->args[IPSET_CB_PROTO]) ||
nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
goto nla_put_failure;
if (dump_flags & IPSET_FLAG_LIST_SETNAME)
......@@ -1407,6 +1419,9 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
nla_put_u8(skb, IPSET_ATTR_REVISION,
set->revision))
goto nla_put_failure;
if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
goto nla_put_failure;
ret = set->variant->head(set, skb);
if (ret < 0)
goto release_refcount;
......@@ -1466,7 +1481,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlattr * const attr[],
struct netlink_ext_ack *extack)
{
if (unlikely(protocol_failed(attr)))
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
{
......@@ -1560,7 +1575,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool use_lineno;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) ||
......@@ -1615,7 +1630,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool use_lineno;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) ||
......@@ -1667,7 +1682,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
if (unlikely(protocol_failed(attr) ||
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_DATA] ||
!flag_nested(attr[IPSET_ATTR_DATA])))
......@@ -1704,7 +1719,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
struct nlmsghdr *nlh2;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME]))
return -IPSET_ERR_PROTOCOL;
......@@ -1720,7 +1735,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
IPSET_CMD_HEADER);
if (!nlh2)
goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
......@@ -1761,7 +1776,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const char *typename;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_TYPENAME] ||
!attr[IPSET_ATTR_FAMILY]))
return -IPSET_ERR_PROTOCOL;
......@@ -1780,7 +1795,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
IPSET_CMD_TYPE);
if (!nlh2)
goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
......@@ -1831,6 +1846,111 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl,
goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
goto nla_put_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (ret < 0)
return ret;
return 0;
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
nlmsg_failure:
kfree_skb(skb2);
return -EMSGSIZE;
}
/* Get set by name or index, from userspace */
static int ip_set_byname(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
const struct ip_set *set;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_SETNAME]))
return -IPSET_ERR_PROTOCOL;
set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
if (id == IPSET_INVALID_ID)
return -ENOENT;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
IPSET_CMD_GET_BYNAME);
if (!nlh2)
goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id)))
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (ret < 0)
return ret;
return 0;
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
nlmsg_failure:
kfree_skb(skb2);
return -EMSGSIZE;
}
static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
[IPSET_ATTR_INDEX] = { .type = NLA_U16 },
};
static int ip_set_byindex(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const attr[],
struct netlink_ext_ack *extack)
{
struct ip_set_net *inst = ip_set_pernet(net);
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
const struct ip_set *set;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_INDEX]))
return -IPSET_ERR_PROTOCOL;
id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
if (id >= inst->ip_set_max)
return -ENOENT;
set = ip_set(inst, id);
if (set == NULL)
return -ENOENT;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
IPSET_CMD_GET_BYINDEX);
if (!nlh2)
goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name))
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
......@@ -1916,6 +2036,16 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_protocol_policy,
},
[IPSET_CMD_GET_BYNAME] = {
.call = ip_set_byname,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_GET_BYINDEX] = {
.call = ip_set_byindex,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_index_policy,
},
};
static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
......@@ -1961,7 +2091,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
goto done;
}
if (req_version->version != IPSET_PROTOCOL) {
if (req_version->version < IPSET_PROTOCOL_MIN) {
ret = -EPROTO;
goto done;
}
......@@ -2024,9 +2154,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
}
nfnl_lock(NFNL_SUBSYS_IPSET);
set = ip_set(inst, req_get->set.index);
strncpy(req_get->set.name, set ? set->name : "",
ret = strscpy(req_get->set.name, set ? set->name : "",
IPSET_MAXNAMELEN);
nfnl_unlock(NFNL_SUBSYS_IPSET);
if (ret < 0)
goto done;
goto copy;
}
default:
......
......@@ -67,7 +67,7 @@ tune_ahash_max(u8 curr, u32 multi)
/* A hash bucket */
struct hbucket {
struct rcu_head rcu; /* for call_rcu_bh */
struct rcu_head rcu; /* for call_rcu */
/* Which positions are used in the array */
DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
......@@ -664,7 +664,7 @@ mtype_resize(struct ip_set *set, bool retried)
spin_unlock_bh(&set->lock);
/* Give time to other readers of the set */
synchronize_rcu_bh();
synchronize_rcu();
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
......
......@@ -36,9 +36,6 @@ MODULE_ALIAS("ip_set_hash:ip,mac");
/* Type specific function prefix */
#define HTYPE hash_ipmac
/* Zero valued element is not supported */
static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
/* IPv4 variant */
/* Member elements */
......@@ -103,8 +100,12 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
if (ether_addr_equal(e.ether, invalid_ether))
if (opt->flags & IPSET_DIM_ONE_SRC)
ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
else
ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
if (is_zero_ether_addr(e.ether))
return -EINVAL;
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
......@@ -140,7 +141,7 @@ hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
if (ether_addr_equal(e.ether, invalid_ether))
if (is_zero_ether_addr(e.ether))
return -IPSET_ERR_HASH_ELEM;
return adtfn(set, &e, &ext, &ext, flags);
......@@ -211,16 +212,16 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
/* MAC can be src only */
if (!(opt->flags & IPSET_DIM_TWO_SRC))
return 0;
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
if (ether_addr_equal(e.ether, invalid_ether))
if (opt->flags & IPSET_DIM_ONE_SRC)
ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
else
ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
if (is_zero_ether_addr(e.ether))
return -EINVAL;
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
......@@ -260,7 +261,7 @@ hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
if (ether_addr_equal(e.ether, invalid_ether))
if (is_zero_ether_addr(e.ether))
return -IPSET_ERR_HASH_ELEM;
return adtfn(set, &e, &ext, &ext, flags);
......
......@@ -81,15 +81,15 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
/* MAC can be src only */
if (!(opt->flags & IPSET_DIM_ONE_SRC))
return 0;
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
if (opt->flags & IPSET_DIM_ONE_SRC)
ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
else
ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
if (is_zero_ether_addr(e.ether))
return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
......
......@@ -25,102 +25,15 @@ static bool nf_ct_acct __read_mostly;
module_param_named(acct, nf_ct_acct, bool, 0644);
MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
#ifdef CONFIG_SYSCTL
static struct ctl_table acct_sysctl_table[] = {
{
.procname = "nf_conntrack_acct",
.data = &init_net.ct.sysctl_acct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{}
};
#endif /* CONFIG_SYSCTL */
unsigned int
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
{
struct nf_conn_acct *acct;
struct nf_conn_counter *counter;
acct = nf_conn_acct_find(ct);
if (!acct)
return 0;
counter = acct->counter;
seq_printf(s, "packets=%llu bytes=%llu ",
(unsigned long long)atomic64_read(&counter[dir].packets),
(unsigned long long)atomic64_read(&counter[dir].bytes));
return 0;
};
EXPORT_SYMBOL_GPL(seq_print_acct);
static const struct nf_ct_ext_type acct_extend = {
.len = sizeof(struct nf_conn_acct),
.align = __alignof__(struct nf_conn_acct),
.id = NF_CT_EXT_ACCT,
};
#ifdef CONFIG_SYSCTL
static int nf_conntrack_acct_init_sysctl(struct net *net)
{
struct ctl_table *table;
table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
GFP_KERNEL);
if (!table)
goto out;
table[0].data = &net->ct.sysctl_acct;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter",
table);
if (!net->ct.acct_sysctl_header) {
pr_err("can't register to sysctl\n");
goto out_register;
}
return 0;
out_register:
kfree(table);
out:
return -ENOMEM;
}
static void nf_conntrack_acct_fini_sysctl(struct net *net)
{
struct ctl_table *table;
table = net->ct.acct_sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(net->ct.acct_sysctl_header);
kfree(table);
}
#else
static int nf_conntrack_acct_init_sysctl(struct net *net)
{
return 0;
}
static void nf_conntrack_acct_fini_sysctl(struct net *net)
{
}
#endif
int nf_conntrack_acct_pernet_init(struct net *net)
void nf_conntrack_acct_pernet_init(struct net *net)
{
net->ct.sysctl_acct = nf_ct_acct;
return nf_conntrack_acct_init_sysctl(net);
}
void nf_conntrack_acct_pernet_fini(struct net *net)
{
nf_conntrack_acct_fini_sysctl(net);
}
int nf_conntrack_acct_init(void)
......
......@@ -2110,10 +2110,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
list_for_each_entry(net, net_exit_list, exit_list) {
nf_conntrack_proto_pernet_fini(net);
nf_conntrack_helper_pernet_fini(net);
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_tstamp_pernet_fini(net);
nf_conntrack_acct_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
free_percpu(net->ct.stat);
free_percpu(net->ct.pcpu_lists);
......@@ -2410,32 +2407,19 @@ int nf_conntrack_init_net(struct net *net)
ret = nf_conntrack_expect_pernet_init(net);
if (ret < 0)
goto err_expect;
ret = nf_conntrack_acct_pernet_init(net);
if (ret < 0)
goto err_acct;
ret = nf_conntrack_tstamp_pernet_init(net);
if (ret < 0)
goto err_tstamp;
ret = nf_conntrack_ecache_pernet_init(net);
if (ret < 0)
goto err_ecache;
ret = nf_conntrack_helper_pernet_init(net);
if (ret < 0)
goto err_helper;
nf_conntrack_acct_pernet_init(net);
nf_conntrack_tstamp_pernet_init(net);
nf_conntrack_ecache_pernet_init(net);
nf_conntrack_helper_pernet_init(net);
ret = nf_conntrack_proto_pernet_init(net);
if (ret < 0)
goto err_proto;
return 0;
err_proto:
nf_conntrack_helper_pernet_fini(net);
err_helper:
nf_conntrack_ecache_pernet_fini(net);
err_ecache:
nf_conntrack_tstamp_pernet_fini(net);
err_tstamp:
nf_conntrack_acct_pernet_fini(net);
err_acct:
nf_conntrack_expect_pernet_fini(net);
err_expect:
free_percpu(net->ct.stat);
......
......@@ -336,85 +336,21 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
#define NF_CT_EVENTS_DEFAULT 1
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
#ifdef CONFIG_SYSCTL
static struct ctl_table event_sysctl_table[] = {
{
.procname = "nf_conntrack_events",
.data = &init_net.ct.sysctl_events,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{}
};
#endif /* CONFIG_SYSCTL */
static const struct nf_ct_ext_type event_extend = {
.len = sizeof(struct nf_conntrack_ecache),
.align = __alignof__(struct nf_conntrack_ecache),
.id = NF_CT_EXT_ECACHE,
};
#ifdef CONFIG_SYSCTL
static int nf_conntrack_event_init_sysctl(struct net *net)
{
struct ctl_table *table;
table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
GFP_KERNEL);
if (!table)
goto out;
table[0].data = &net->ct.sysctl_events;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
net->ct.event_sysctl_header =
register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.event_sysctl_header) {
pr_err("can't register to sysctl\n");
goto out_register;
}
return 0;
out_register:
kfree(table);
out:
return -ENOMEM;
}
static void nf_conntrack_event_fini_sysctl(struct net *net)
{
struct ctl_table *table;
table = net->ct.event_sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(net->ct.event_sysctl_header);
kfree(table);
}
#else
static int nf_conntrack_event_init_sysctl(struct net *net)
{
return 0;
}
static void nf_conntrack_event_fini_sysctl(struct net *net)
{
}
#endif /* CONFIG_SYSCTL */
int nf_conntrack_ecache_pernet_init(struct net *net)
void nf_conntrack_ecache_pernet_init(struct net *net)
{
net->ct.sysctl_events = nf_ct_events;
INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
return nf_conntrack_event_init_sysctl(net);
}
void nf_conntrack_ecache_pernet_fini(struct net *net)
{
cancel_delayed_work_sync(&net->ct.ecache_dwork);
nf_conntrack_event_fini_sysctl(net);
}
int nf_conntrack_ecache_init(void)
......
......@@ -42,67 +42,6 @@ module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
MODULE_PARM_DESC(nf_conntrack_helper,
"Enable automatic conntrack helper assignment (default 0)");
#ifdef CONFIG_SYSCTL
static struct ctl_table helper_sysctl_table[] = {
{
.procname = "nf_conntrack_helper",
.data = &init_net.ct.sysctl_auto_assign_helper,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{}
};
static int nf_conntrack_helper_init_sysctl(struct net *net)
{
struct ctl_table *table;
table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table),
GFP_KERNEL);
if (!table)
goto out;
table[0].data = &net->ct.sysctl_auto_assign_helper;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
net->ct.helper_sysctl_header =
register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.helper_sysctl_header) {
pr_err("nf_conntrack_helper: can't register to sysctl.\n");
goto out_register;
}
return 0;
out_register:
kfree(table);
out:
return -ENOMEM;
}
static void nf_conntrack_helper_fini_sysctl(struct net *net)
{
struct ctl_table *table;
table = net->ct.helper_sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(net->ct.helper_sysctl_header);
kfree(table);
}
#else
static int nf_conntrack_helper_init_sysctl(struct net *net)
{
return 0;
}
static void nf_conntrack_helper_fini_sysctl(struct net *net)
{
}
#endif /* CONFIG_SYSCTL */
/* Stupid hash, but collision free for the default registrations of the
* helpers currently in the kernel. */
static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
......@@ -533,16 +472,10 @@ static const struct nf_ct_ext_type helper_extend = {
.id = NF_CT_EXT_HELPER,
};
int nf_conntrack_helper_pernet_init(struct net *net)
void nf_conntrack_helper_pernet_init(struct net *net)
{
net->ct.auto_assign_helper_warned = false;
net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
return nf_conntrack_helper_init_sysctl(net);
}
void nf_conntrack_helper_pernet_fini(struct net *net)
{
nf_conntrack_helper_fini_sysctl(net);
}
int nf_conntrack_helper_init(void)
......
......@@ -47,7 +47,6 @@
#include <net/netfilter/nf_conntrack_synproxy.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_helper.h>
#endif
......@@ -1688,6 +1687,22 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
return 0;
}
#if defined(CONFIG_NF_CONNTRACK_MARK)
static void ctnetlink_change_mark(struct nf_conn *ct,
const struct nlattr * const cda[])
{
u32 mark, newmark, mask = 0;
if (cda[CTA_MARK_MASK])
mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
mark = ntohl(nla_get_be32(cda[CTA_MARK]));
newmark = (ct->mark & mask) ^ mark;
if (newmark != ct->mark)
ct->mark = newmark;
}
#endif
static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
[CTA_PROTOINFO_TCP] = { .type = NLA_NESTED },
[CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED },
......@@ -1883,7 +1898,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
ctnetlink_change_mark(ct, cda);
#endif
if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
......@@ -2027,7 +2042,7 @@ ctnetlink_create_conntrack(struct net *net,
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
ctnetlink_change_mark(ct, cda);
#endif
/* setup master conntrack: this is a confirmed expectation */
......@@ -2524,14 +2539,7 @@ ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
}
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK]) {
u32 mask = 0, mark, newmark;
if (cda[CTA_MARK_MASK])
mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
mark = ntohl(nla_get_be32(cda[CTA_MARK]));
newmark = (ct->mark & mask) ^ mark;
if (newmark != ct->mark)
ct->mark = newmark;
ctnetlink_change_mark(ct, cda);
}
#endif
return 0;
......
......@@ -175,8 +175,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
static
int nf_ct_l4proto_register_sysctl(struct net *net,
struct nf_proto_net *pn,
const struct nf_conntrack_l4proto *l4proto)
struct nf_proto_net *pn)
{
int err = 0;
......@@ -198,9 +197,7 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
}
static
void nf_ct_l4proto_unregister_sysctl(struct net *net,
struct nf_proto_net *pn,
const struct nf_conntrack_l4proto *l4proto)
void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table_header != NULL)
......@@ -252,7 +249,7 @@ int nf_ct_l4proto_pernet_register_one(struct net *net,
if (pn == NULL)
goto out;
ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
ret = nf_ct_l4proto_register_sysctl(net, pn);
if (ret < 0)
goto out;
......@@ -296,7 +293,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
return;
pn->users--;
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
nf_ct_l4proto_unregister_sysctl(pn);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
......@@ -946,16 +943,14 @@ int nf_conntrack_proto_pernet_init(struct net *net)
if (err < 0)
return err;
err = nf_ct_l4proto_register_sysctl(net,
pn,
&nf_conntrack_l4proto_generic);
pn);
if (err < 0)
return err;
err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
if (err < 0) {
nf_ct_l4proto_unregister_sysctl(net, pn,
&nf_conntrack_l4proto_generic);
nf_ct_l4proto_unregister_sysctl(pn);
return err;
}
......@@ -971,9 +966,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
pn->users--;
nf_ct_l4proto_unregister_sysctl(net,
pn,
&nf_conntrack_l4proto_generic);
nf_ct_l4proto_unregister_sysctl(pn);
}
......
......@@ -320,9 +320,49 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
#ifdef CONFIG_SYSCTL
static struct ctl_table gre_sysctl_table[] = {
{
.procname = "nf_conntrack_gre_timeout",
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_gre_timeout_stream",
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{}
};
#endif
static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf,
struct netns_proto_gre *net_gre)
{
#ifdef CONFIG_SYSCTL
int i;
if (nf->ctl_table)
return 0;
nf->ctl_table = kmemdup(gre_sysctl_table,
sizeof(gre_sysctl_table),
GFP_KERNEL);
if (!nf->ctl_table)
return -ENOMEM;
for (i = 0; i < GRE_CT_MAX; i++)
nf->ctl_table[i].data = &net_gre->gre_timeouts[i];
#endif
return 0;
}
static int gre_init_net(struct net *net)
{
struct netns_proto_gre *net_gre = gre_pernet(net);
struct nf_proto_net *nf = &net_gre->nf;
int i;
rwlock_init(&net_gre->keymap_lock);
......@@ -330,7 +370,7 @@ static int gre_init_net(struct net *net)
for (i = 0; i < GRE_CT_MAX; i++)
net_gre->gre_timeouts[i] = gre_timeouts[i];
return 0;
return gre_kmemdup_sysctl_table(net, nf, net_gre);
}
/* protocol helper struct */
......
......@@ -29,7 +29,7 @@
static const unsigned int udp_timeouts[UDP_CT_MAX] = {
[UDP_CT_UNREPLIED] = 30*HZ,
[UDP_CT_REPLIED] = 180*HZ,
[UDP_CT_REPLIED] = 120*HZ,
};
static unsigned int *udp_get_timeouts(struct net *net)
......@@ -100,11 +100,21 @@ static int udp_packet(struct nf_conn *ct,
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
if (!nf_ct_is_confirmed(ct))
ct->proto.udp.stream_ts = 2 * HZ + jiffies;
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
* stream. Set Assured.
*/
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
nf_ct_refresh_acct(ct, ctinfo, skb,
timeouts[UDP_CT_REPLIED]);
unsigned long extra = timeouts[UDP_CT_UNREPLIED];
/* Still active after two seconds? Extend timeout. */
if (time_after(jiffies, ct->proto.udp.stream_ts))
extra = timeouts[UDP_CT_REPLIED];
nf_ct_refresh_acct(ct, ctinfo, skb, extra);
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
......
......@@ -267,6 +267,24 @@ static const char* l4proto_name(u16 proto)
return "unknown";
}
static unsigned int
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
{
struct nf_conn_acct *acct;
struct nf_conn_counter *counter;
acct = nf_conn_acct_find(ct);
if (!acct)
return 0;
counter = acct->counter;
seq_printf(s, "packets=%llu bytes=%llu ",
(unsigned long long)atomic64_read(&counter[dir].packets),
(unsigned long long)atomic64_read(&counter[dir].bytes));
return 0;
}
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
......@@ -514,36 +532,53 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
static struct ctl_table_header *nf_ct_netfilter_header;
enum nf_ct_sysctl_index {
NF_SYSCTL_CT_MAX,
NF_SYSCTL_CT_COUNT,
NF_SYSCTL_CT_BUCKETS,
NF_SYSCTL_CT_CHECKSUM,
NF_SYSCTL_CT_LOG_INVALID,
NF_SYSCTL_CT_EXPECT_MAX,
NF_SYSCTL_CT_ACCT,
NF_SYSCTL_CT_HELPER,
#ifdef CONFIG_NF_CONNTRACK_EVENTS
NF_SYSCTL_CT_EVENTS,
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
NF_SYSCTL_CT_TIMESTAMP,
#endif
};
static struct ctl_table nf_ct_sysctl_table[] = {
{
[NF_SYSCTL_CT_MAX] = {
.procname = "nf_conntrack_max",
.data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
[NF_SYSCTL_CT_COUNT] = {
.procname = "nf_conntrack_count",
.data = &init_net.ct.count,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = proc_dointvec,
},
{
[NF_SYSCTL_CT_BUCKETS] = {
.procname = "nf_conntrack_buckets",
.data = &nf_conntrack_htable_size_user,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = nf_conntrack_hash_sysctl,
},
{
[NF_SYSCTL_CT_CHECKSUM] = {
.procname = "nf_conntrack_checksum",
.data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
[NF_SYSCTL_CT_LOG_INVALID] = {
.procname = "nf_conntrack_log_invalid",
.data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(unsigned int),
......@@ -552,13 +587,45 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra1 = &log_invalid_proto_min,
.extra2 = &log_invalid_proto_max,
},
{
[NF_SYSCTL_CT_EXPECT_MAX] = {
.procname = "nf_conntrack_expect_max",
.data = &nf_ct_expect_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
[NF_SYSCTL_CT_ACCT] = {
.procname = "nf_conntrack_acct",
.data = &init_net.ct.sysctl_acct,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
[NF_SYSCTL_CT_HELPER] = {
.procname = "nf_conntrack_helper",
.data = &init_net.ct.sysctl_auto_assign_helper,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_NF_CONNTRACK_EVENTS
[NF_SYSCTL_CT_EVENTS] = {
.procname = "nf_conntrack_events",
.data = &init_net.ct.sysctl_events,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
[NF_SYSCTL_CT_TIMESTAMP] = {
.procname = "nf_conntrack_timestamp",
.data = &init_net.ct.sysctl_tstamp,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif
{ }
};
......@@ -582,16 +649,28 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
if (!table)
goto out_kmemdup;
table[1].data = &net->ct.count;
table[3].data = &net->ct.sysctl_checksum;
table[4].data = &net->ct.sysctl_log_invalid;
table[NF_SYSCTL_CT_COUNT].data = &net->ct.count;
table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
#endif
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
if (net->user_ns != &init_user_ns) {
table[NF_SYSCTL_CT_MAX].procname = NULL;
table[NF_SYSCTL_CT_ACCT].procname = NULL;
table[NF_SYSCTL_CT_HELPER].procname = NULL;
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
table[NF_SYSCTL_CT_TIMESTAMP].procname = NULL;
#endif
#ifdef CONFIG_NF_CONNTRACK_EVENTS
table[NF_SYSCTL_CT_EVENTS].procname = NULL;
#endif
}
if (!net_eq(&init_net, net))
table[2].mode = 0444;
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.sysctl_header)
......
......@@ -22,83 +22,15 @@ static bool nf_ct_tstamp __read_mostly;
module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
#ifdef CONFIG_SYSCTL
static struct ctl_table tstamp_sysctl_table[] = {
{
.procname = "nf_conntrack_timestamp",
.data = &init_net.ct.sysctl_tstamp,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{}
};
#endif /* CONFIG_SYSCTL */
static const struct nf_ct_ext_type tstamp_extend = {
.len = sizeof(struct nf_conn_tstamp),
.align = __alignof__(struct nf_conn_tstamp),
.id = NF_CT_EXT_TSTAMP,
};
#ifdef CONFIG_SYSCTL
static int nf_conntrack_tstamp_init_sysctl(struct net *net)
{
struct ctl_table *table;
table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
GFP_KERNEL);
if (!table)
goto out;
table[0].data = &net->ct.sysctl_tstamp;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
net->ct.tstamp_sysctl_header = register_net_sysctl(net, "net/netfilter",
table);
if (!net->ct.tstamp_sysctl_header) {
pr_err("can't register to sysctl\n");
goto out_register;
}
return 0;
out_register:
kfree(table);
out:
return -ENOMEM;
}
static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
{
struct ctl_table *table;
table = net->ct.tstamp_sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
kfree(table);
}
#else
static int nf_conntrack_tstamp_init_sysctl(struct net *net)
{
return 0;
}
static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
{
}
#endif
int nf_conntrack_tstamp_pernet_init(struct net *net)
void nf_conntrack_tstamp_pernet_init(struct net *net)
{
net->ct.sysctl_tstamp = nf_ct_tstamp;
return nf_conntrack_tstamp_init_sysctl(net);
}
void nf_conntrack_tstamp_pernet_fini(struct net *net)
{
nf_conntrack_tstamp_fini_sysctl(net);
}
int nf_conntrack_tstamp_init(void)
......
......@@ -247,7 +247,8 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
static int
nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data)
{
......@@ -279,40 +280,19 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
}
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{
struct flow_offload_tuple_rhash *tuplehash;
struct rhashtable_iter hti;
struct flow_offload *flow;
rhashtable_walk_enter(&flow_table->rhashtable, &hti);
rhashtable_walk_start(&hti);
while ((tuplehash = rhashtable_walk_next(&hti))) {
if (IS_ERR(tuplehash)) {
if (PTR_ERR(tuplehash) != -EAGAIN)
break;
continue;
}
if (tuplehash->tuple.dir)
continue;
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
struct nf_flowtable *flow_table = data;
if (nf_flow_has_expired(flow) ||
(flow->flags & (FLOW_OFFLOAD_DYING |
FLOW_OFFLOAD_TEARDOWN)))
(flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
flow_offload_del(flow_table, flow);
}
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
}
static void nf_flow_offload_work_gc(struct work_struct *work)
......@@ -320,7 +300,7 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
struct nf_flowtable *flow_table;
flow_table = container_of(work, struct nf_flowtable, gc_work.work);
nf_flow_offload_gc_step(flow_table);
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
......@@ -504,7 +484,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
nf_flow_offload_gc_step(flow_table);
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
......
......@@ -23,7 +23,6 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
......@@ -38,8 +37,6 @@ static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
static unsigned int nat_net_id __read_mostly;
static struct hlist_head *nf_nat_bysource __read_mostly;
......@@ -67,13 +64,6 @@ __nf_nat_l3proto_find(u8 family)
return rcu_dereference(nf_nat_l3protos[family]);
}
inline const struct nf_nat_l4proto *
__nf_nat_l4proto_find(u8 family, u8 protonum)
{
return rcu_dereference(nf_nat_l4protos[family][protonum]);
}
EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find);
#ifdef CONFIG_XFRM
static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
{
......@@ -173,27 +163,66 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
}
EXPORT_SYMBOL(nf_nat_used_tuple);
static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
const struct nf_nat_range2 *range)
{
if (t->src.l3num == NFPROTO_IPV4)
return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
}
/* Is the manipable part of the tuple between min and max incl? */
static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
const union nf_conntrack_man_proto *max)
{
__be16 port;
switch (tuple->dst.protonum) {
case IPPROTO_ICMP: /* fallthrough */
case IPPROTO_ICMPV6:
return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
case IPPROTO_GRE: /* all fall though */
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_DCCP:
case IPPROTO_SCTP:
if (maniptype == NF_NAT_MANIP_SRC)
port = tuple->src.u.all;
else
port = tuple->dst.u.all;
return ntohs(port) >= ntohs(min->all) &&
ntohs(port) <= ntohs(max->all);
default:
return true;
}
}
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range.
*/
static int in_range(const struct nf_nat_l3proto *l3proto,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
static int in_range(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range)
{
/* If we are supposed to map IPs, then we must be in the
* range specified, otherwise let this drag us onto a new src IP.
*/
if (range->flags & NF_NAT_RANGE_MAP_IPS &&
!l3proto->in_range(tuple, range))
!nf_nat_inet_in_range(tuple, range))
return 0;
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
l4proto->in_range(tuple, NF_NAT_MANIP_SRC,
&range->min_proto, &range->max_proto))
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
return 1;
return 0;
return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
&range->min_proto, &range->max_proto);
}
static inline int
......@@ -212,8 +241,6 @@ same_src(const struct nf_conn *ct,
static int
find_appropriate_src(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_nat_l3proto *l3proto,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range2 *range)
......@@ -230,7 +257,7 @@ find_appropriate_src(struct net *net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
if (in_range(l3proto, l4proto, result, range))
if (in_range(result, range))
return 1;
}
}
......@@ -311,6 +338,123 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
}
}
/* Alter the per-proto part of the tuple (depending on maniptype), to
* give a unique tuple in the given range if possible.
*
* Per-protocol part of tuple is initialized to the incoming packet.
*/
static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
unsigned int range_size, min, max, i, attempts;
__be16 *keyptr;
u16 off;
static const unsigned int max_attempts = 128;
switch (tuple->dst.protonum) {
case IPPROTO_ICMP: /* fallthrough */
case IPPROTO_ICMPV6:
/* id is same for either direction... */
keyptr = &tuple->src.u.icmp.id;
min = range->min_proto.icmp.id;
range_size = ntohs(range->max_proto.icmp.id) -
ntohs(range->min_proto.icmp.id) + 1;
goto find_free_id;
#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
case IPPROTO_GRE:
/* If there is no master conntrack we are not PPTP,
do not change tuples */
if (!ct->master)
return;
if (maniptype == NF_NAT_MANIP_SRC)
keyptr = &tuple->src.u.gre.key;
else
keyptr = &tuple->dst.u.gre.key;
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
min = 1;
range_size = 65535;
} else {
min = ntohs(range->min_proto.gre.key);
range_size = ntohs(range->max_proto.gre.key) - min + 1;
}
goto find_free_id;
#endif
case IPPROTO_UDP: /* fallthrough */
case IPPROTO_UDPLITE: /* fallthrough */
case IPPROTO_TCP: /* fallthrough */
case IPPROTO_SCTP: /* fallthrough */
case IPPROTO_DCCP: /* fallthrough */
if (maniptype == NF_NAT_MANIP_SRC)
keyptr = &tuple->src.u.all;
else
keyptr = &tuple->dst.u.all;
break;
default:
return;
}
/* If no range specified... */
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
/* If it's dst rewrite, can't change port */
if (maniptype == NF_NAT_MANIP_DST)
return;
if (ntohs(*keyptr) < 1024) {
/* Loose convention: >> 512 is credential passing */
if (ntohs(*keyptr) < 512) {
min = 1;
range_size = 511 - min + 1;
} else {
min = 600;
range_size = 1023 - min + 1;
}
} else {
min = 1024;
range_size = 65535 - 1024 + 1;
}
} else {
min = ntohs(range->min_proto.all);
max = ntohs(range->max_proto.all);
if (unlikely(max < min))
swap(max, min);
range_size = max - min + 1;
}
find_free_id:
if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
else
off = prandom_u32();
attempts = range_size;
if (attempts > max_attempts)
attempts = max_attempts;
/* We are in softirq; doing a search of the entire range risks
* soft lockup when all tuples are already used.
*
* If we can't find any free port from first offset, pick a new
* one and try again, with ever smaller search window.
*/
another_round:
for (i = 0; i < attempts; i++, off++) {
*keyptr = htons(min + off % range_size);
if (!nf_nat_used_tuple(tuple, ct))
return;
}
if (attempts >= range_size || attempts < 16)
return;
attempts /= 2;
off = prandom_u32();
goto another_round;
}
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
* we change the source to map into the range. For NF_INET_PRE_ROUTING
* and NF_INET_LOCAL_OUT, we change the destination to map into the
......@@ -325,17 +469,10 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
const struct nf_conntrack_zone *zone;
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
zone = nf_ct_zone(ct);
rcu_read_lock();
l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
orig_tuple->dst.protonum);
/* 1) If this srcip/proto/src-proto-part is currently mapped,
* and that same mapping gives a unique tuple within the given
* range, use that.
......@@ -347,16 +484,16 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
if (in_range(l3proto, l4proto, orig_tuple, range)) {
if (in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
*tuple = *orig_tuple;
goto out;
return;
}
} else if (find_appropriate_src(net, zone, l3proto, l4proto,
} else if (find_appropriate_src(net, zone,
orig_tuple, tuple, range)) {
pr_debug("get_unique_tuple: Found current src map\n");
if (!nf_nat_used_tuple(tuple, ct))
goto out;
return;
}
}
......@@ -372,21 +509,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto->in_range(tuple, maniptype,
l4proto_in_range(tuple, maniptype,
&range->min_proto,
&range->max_proto) &&
(range->min_proto.all == range->max_proto.all ||
!nf_nat_used_tuple(tuple, ct)))
goto out;
return;
} else if (!nf_nat_used_tuple(tuple, ct)) {
goto out;
return;
}
}
/* Last chance: get protocol to try to obtain unique tuple. */
l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
out:
rcu_read_unlock();
nf_nat_l4proto_unique_tuple(tuple, range, maniptype, ct);
}
struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
......@@ -502,16 +637,13 @@ static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_dir dir)
{
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
struct nf_conntrack_tuple target;
/* We are aiming to look like inverse of other direction. */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
l3proto = __nf_nat_l3proto_find(target.src.l3num);
l4proto = __nf_nat_l4proto_find(target.src.l3num,
target.dst.protonum);
if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
if (!l3proto->manip_pkt(skb, 0, &target, mtype))
return NF_DROP;
return NF_ACCEPT;
......@@ -667,16 +799,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
return 0;
}
static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
{
struct nf_nat_proto_clean clean = {
.l3proto = l3proto,
.l4proto = l4proto,
};
nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
}
static void nf_nat_l3proto_clean(u8 l3proto)
{
struct nf_nat_proto_clean clean = {
......@@ -686,82 +808,8 @@ static void nf_nat_l3proto_clean(u8 l3proto)
nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
}
/* Protocol registration. */
int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
{
const struct nf_nat_l4proto **l4protos;
unsigned int i;
int ret = 0;
mutex_lock(&nf_nat_proto_mutex);
if (nf_nat_l4protos[l3proto] == NULL) {
l4protos = kmalloc_array(IPPROTO_MAX,
sizeof(struct nf_nat_l4proto *),
GFP_KERNEL);
if (l4protos == NULL) {
ret = -ENOMEM;
goto out;
}
for (i = 0; i < IPPROTO_MAX; i++)
RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown);
/* Before making proto_array visible to lockless readers,
* we must make sure its content is committed to memory.
*/
smp_wmb();
nf_nat_l4protos[l3proto] = l4protos;
}
if (rcu_dereference_protected(
nf_nat_l4protos[l3proto][l4proto->l4proto],
lockdep_is_held(&nf_nat_proto_mutex)
) != &nf_nat_l4proto_unknown) {
ret = -EBUSY;
goto out;
}
RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto);
out:
mutex_unlock(&nf_nat_proto_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_register);
/* No one stores the protocol anywhere; simply delete it. */
void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto)
{
mutex_lock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto],
&nf_nat_l4proto_unknown);
mutex_unlock(&nf_nat_proto_mutex);
synchronize_rcu();
nf_nat_l4proto_clean(l3proto, l4proto->l4proto);
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
{
mutex_lock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
&nf_nat_l4proto_tcp);
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
&nf_nat_l4proto_udp);
#ifdef CONFIG_NF_NAT_PROTO_DCCP
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
&nf_nat_l4proto_dccp);
#endif
#ifdef CONFIG_NF_NAT_PROTO_SCTP
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
&nf_nat_l4proto_sctp);
#endif
#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE],
&nf_nat_l4proto_udplite);
#endif
mutex_unlock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
return 0;
}
......@@ -802,12 +850,26 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
[CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
};
static int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range2 *range)
{
if (tb[CTA_PROTONAT_PORT_MIN]) {
range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
range->max_proto.all = range->min_proto.all;
range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
if (tb[CTA_PROTONAT_PORT_MAX]) {
range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
return 0;
}
static int nfnetlink_parse_nat_proto(struct nlattr *attr,
const struct nf_conn *ct,
struct nf_nat_range2 *range)
{
struct nlattr *tb[CTA_PROTONAT_MAX+1];
const struct nf_nat_l4proto *l4proto;
int err;
err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr,
......@@ -815,11 +877,7 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
if (err < 0)
return err;
l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->nlattr_to_range)
err = l4proto->nlattr_to_range(tb, range);
return err;
return nf_nat_l4proto_nlattr_to_range(tb, range);
}
static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
......@@ -1082,7 +1140,6 @@ static int __init nf_nat_init(void)
static void __exit nf_nat_cleanup(void)
{
struct nf_nat_proto_clean clean = {};
unsigned int i;
nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
......@@ -1090,10 +1147,6 @@ static void __exit nf_nat_cleanup(void)
nf_ct_helper_expectfn_unregister(&follow_master_nat);
RCU_INIT_POINTER(nf_nat_hook, NULL);
synchronize_rcu();
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
synchronize_net();
kvfree(nf_nat_bysource);
unregister_pernet_subsys(&nat_net_ops);
......
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/dccp.h>
#include <linux/sctp.h>
#include <net/sctp/checksum.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
static void
__udp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, struct udphdr *hdr,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype, bool do_csum)
{
__be16 *portptr, newport;
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
newport = tuple->src.u.udp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst port */
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
if (do_csum) {
l3proto->csum_update(skb, iphdroff, &hdr->check,
tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
false);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
}
*portptr = newport;
}
static bool udp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct udphdr *hdr;
bool do_csum;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum);
return true;
}
static bool udplite_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
struct udphdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true);
#endif
return true;
}
static bool
sctp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#ifdef CONFIG_NF_CT_PROTO_SCTP
struct sctphdr *hdr;
int hdrsize = 8;
/* This could be an inner header returned in imcp packet; in such
* cases we cannot update the checksum field since it is outside
* of the 8 bytes of transport layer headers we are guaranteed.
*/
if (skb->len >= hdroff + sizeof(*hdr))
hdrsize = sizeof(*hdr);
if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct sctphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
hdr->source = tuple->src.u.sctp.port;
} else {
/* Get rid of dst port */
hdr->dest = tuple->dst.u.sctp.port;
}
if (hdrsize < sizeof(*hdr))
return true;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
hdr->checksum = sctp_compute_cksum(skb, hdroff);
skb->ip_summed = CHECKSUM_NONE;
}
#endif
return true;
}
static bool
tcp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct tcphdr *hdr;
__be16 *portptr, newport, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
/* this could be a inner header returned in icmp packet; in such
cases we cannot update the checksum field since it is outside of
the 8 bytes of transport layer headers we are guaranteed */
if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct tcphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
newport = tuple->src.u.tcp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst port */
newport = tuple->dst.u.tcp.port;
portptr = &hdr->dest;
}
oldport = *portptr;
*portptr = newport;
if (hdrsize < sizeof(*hdr))
return true;
l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
return true;
}
static bool
dccp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#ifdef CONFIG_NF_CT_PROTO_DCCP
struct dccp_hdr *hdr;
__be16 *portptr, oldport, newport;
int hdrsize = 8; /* DCCP connection tracking guarantees this much */
if (skb->len >= hdroff + sizeof(struct dccp_hdr))
hdrsize = sizeof(struct dccp_hdr);
if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct dccp_hdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
newport = tuple->src.u.dccp.port;
portptr = &hdr->dccph_sport;
} else {
newport = tuple->dst.u.dccp.port;
portptr = &hdr->dccph_dport;
}
oldport = *portptr;
*portptr = newport;
if (hdrsize < sizeof(*hdr))
return true;
l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
tuple, maniptype);
inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
false);
#endif
return true;
}
static bool
icmp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct icmphdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmphdr *)(skb->data + hdroff);
inet_proto_csum_replace2(&hdr->checksum, skb,
hdr->un.echo.id, tuple->src.u.icmp.id, false);
hdr->un.echo.id = tuple->src.u.icmp.id;
return true;
}
static bool
icmpv6_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct icmp6hdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmp6hdr *)(skb->data + hdroff);
l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
tuple, maniptype);
if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
tuple->src.u.icmp.id, false);
hdr->icmp6_identifier = tuple->src.u.icmp.id;
}
return true;
}
/* manipulate a GRE packet according to maniptype */
static bool
gre_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
const struct gre_base_hdr *greh;
struct pptp_gre_header *pgreh;
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
return false;
greh = (void *)skb->data + hdroff;
pgreh = (struct pptp_gre_header *)greh;
/* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
if (maniptype != NF_NAT_MANIP_DST)
return true;
switch (greh->flags & GRE_VERSION) {
case GRE_VERSION_0:
/* We do not currently NAT any GREv0 packets.
* Try to behave like "nf_nat_proto_unknown" */
break;
case GRE_VERSION_1:
pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
default:
pr_debug("can't nat unknown GRE version\n");
return false;
}
#endif
return true;
}
bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
switch (tuple->dst.protonum) {
case IPPROTO_TCP:
return tcp_manip_pkt(skb, l3proto, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_UDP:
return udp_manip_pkt(skb, l3proto, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_UDPLITE:
return udplite_manip_pkt(skb, l3proto, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_SCTP:
return sctp_manip_pkt(skb, l3proto, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_ICMP:
return icmp_manip_pkt(skb, l3proto, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_ICMPV6:
return icmpv6_manip_pkt(skb, l3proto, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_DCCP:
return dccp_manip_pkt(skb, l3proto, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_GRE:
return gre_manip_pkt(skb, l3proto, iphdroff, hdroff,
tuple, maniptype);
}
/* If we don't know protocol -- no error, pass it unmodified. */
return true;
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_manip_pkt);
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2008 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/random.h>
#include <linux/netfilter.h>
#include <linux/export.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
const union nf_conntrack_man_proto *min,
const union nf_conntrack_man_proto *max)
{
__be16 port;
if (maniptype == NF_NAT_MANIP_SRC)
port = tuple->src.u.all;
else
port = tuple->dst.u.all;
return ntohs(port) >= ntohs(min->all) &&
ntohs(port) <= ntohs(max->all);
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct,
u16 *rover)
{
unsigned int range_size, min, max, i;
__be16 *portptr;
u_int16_t off;
if (maniptype == NF_NAT_MANIP_SRC)
portptr = &tuple->src.u.all;
else
portptr = &tuple->dst.u.all;
/* If no range specified... */
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
/* If it's dst rewrite, can't change port */
if (maniptype == NF_NAT_MANIP_DST)
return;
if (ntohs(*portptr) < 1024) {
/* Loose convention: >> 512 is credential passing */
if (ntohs(*portptr) < 512) {
min = 1;
range_size = 511 - min + 1;
} else {
min = 600;
range_size = 1023 - min + 1;
}
} else {
min = 1024;
range_size = 65535 - 1024 + 1;
}
} else {
min = ntohs(range->min_proto.all);
max = ntohs(range->max_proto.all);
if (unlikely(max < min))
swap(max, min);
range_size = max - min + 1;
}
if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC
? tuple->dst.u.all
: tuple->src.u.all);
} else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
off = prandom_u32();
} else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) {
off = (ntohs(*portptr) - ntohs(range->base_proto.all));
} else {
off = *rover;
}
for (i = 0; ; ++off) {
*portptr = htons(min + off % range_size);
if (++i != range_size && nf_nat_used_tuple(tuple, ct))
continue;
if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
NF_NAT_RANGE_PROTO_OFFSET)))
*rover = off;
return;
}
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range2 *range)
{
if (tb[CTA_PROTONAT_PORT_MIN]) {
range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
range->max_proto.all = range->min_proto.all;
range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
if (tb[CTA_PROTONAT_PORT_MAX]) {
range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
return 0;
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range);
#endif
/*
* DCCP NAT protocol helper
*
* Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/dccp.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
static u_int16_t dccp_port_rover;
static void
dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
&dccp_port_rover);
}
static bool
dccp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct dccp_hdr *hdr;
__be16 *portptr, oldport, newport;
int hdrsize = 8; /* DCCP connection tracking guarantees this much */
if (skb->len >= hdroff + sizeof(struct dccp_hdr))
hdrsize = sizeof(struct dccp_hdr);
if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct dccp_hdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
newport = tuple->src.u.dccp.port;
portptr = &hdr->dccph_sport;
} else {
newport = tuple->dst.u.dccp.port;
portptr = &hdr->dccph_dport;
}
oldport = *portptr;
*portptr = newport;
if (hdrsize < sizeof(*hdr))
return true;
l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
tuple, maniptype);
inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
false);
return true;
}
const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.l4proto = IPPROTO_DCCP,
.manip_pkt = dccp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = dccp_unique_tuple,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
/*
* Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/sctp.h>
#include <net/sctp/checksum.h>
#include <net/netfilter/nf_nat_l4proto.h>
static u_int16_t nf_sctp_port_rover;
static void
sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
&nf_sctp_port_rover);
}
static bool
sctp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct sctphdr *hdr;
int hdrsize = 8;
/* This could be an inner header returned in imcp packet; in such
* cases we cannot update the checksum field since it is outside
* of the 8 bytes of transport layer headers we are guaranteed.
*/
if (skb->len >= hdroff + sizeof(*hdr))
hdrsize = sizeof(*hdr);
if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct sctphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
hdr->source = tuple->src.u.sctp.port;
} else {
/* Get rid of dst port */
hdr->dest = tuple->dst.u.sctp.port;
}
if (hdrsize < sizeof(*hdr))
return true;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
hdr->checksum = sctp_compute_cksum(skb, hdroff);
skb->ip_summed = CHECKSUM_NONE;
}
return true;
}
const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.l4proto = IPPROTO_SCTP,
.manip_pkt = sctp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = sctp_unique_tuple,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/tcp.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
static u16 tcp_port_rover;
static void
tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
&tcp_port_rover);
}
static bool
tcp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct tcphdr *hdr;
__be16 *portptr, newport, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
/* this could be a inner header returned in icmp packet; in such
cases we cannot update the checksum field since it is outside of
the 8 bytes of transport layer headers we are guaranteed */
if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct tcphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
newport = tuple->src.u.tcp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst port */
newport = tuple->dst.u.tcp.port;
portptr = &hdr->dest;
}
oldport = *portptr;
*portptr = newport;
if (hdrsize < sizeof(*hdr))
return true;
l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
return true;
}
const struct nf_nat_l4proto nf_nat_l4proto_tcp = {
.l4proto = IPPROTO_TCP,
.manip_pkt = tcp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = tcp_unique_tuple,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/udp.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
static u16 udp_port_rover;
static void
udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
&udp_port_rover);
}
static void
__udp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, struct udphdr *hdr,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype, bool do_csum)
{
__be16 *portptr, newport;
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
newport = tuple->src.u.udp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst port */
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
if (do_csum) {
l3proto->csum_update(skb, iphdroff, &hdr->check,
tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
false);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
}
*portptr = newport;
}
static bool udp_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct udphdr *hdr;
bool do_csum;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum);
return true;
}
#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
static u16 udplite_port_rover;
static bool udplite_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct udphdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true);
return true;
}
static void
udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
&udplite_port_rover);
}
const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.l4proto = IPPROTO_UDPLITE,
.manip_pkt = udplite_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = udplite_unique_tuple,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
#endif /* CONFIG_NF_NAT_PROTO_UDPLITE */
const struct nf_nat_l4proto nf_nat_l4proto_udp = {
.l4proto = IPPROTO_UDP,
.manip_pkt = udp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
.unique_tuple = udp_unique_tuple,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
/* The "unknown" protocol. This is what is used for protocols we
* don't understand. It's returned by ip_ct_find_proto().
*/
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/init.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l4proto.h>
static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type manip_type,
const union nf_conntrack_man_proto *min,
const union nf_conntrack_man_proto *max)
{
return true;
}
static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
/* Sorry: we can't help you; if it's not unique, we can't frob
* anything.
*/
return;
}
static bool
unknown_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
return true;
}
const struct nf_nat_l4proto nf_nat_l4proto_unknown = {
.manip_pkt = unknown_manip_pkt,
.in_range = unknown_in_range,
.unique_tuple = unknown_unique_tuple,
};
......@@ -18,6 +18,7 @@
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
......@@ -316,6 +317,9 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
static void nf_nat_sip_expected(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
struct nf_conn_help *help = nfct_help(ct->master);
struct nf_conntrack_expect *pair_exp;
int range_set_for_snat = 0;
struct nf_nat_range2 range;
/* This must be a fresh one. */
......@@ -327,15 +331,42 @@ static void nf_nat_sip_expected(struct nf_conn *ct,
range.min_addr = range.max_addr = exp->saved_addr;
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
/* Change src to where master sends to, but only if the connection
* actually came from the same source. */
if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
/* Do media streams SRC manip according with the parameters
* found in the paired expectation.
*/
if (exp->class != SIP_EXPECT_SIGNALLING) {
spin_lock_bh(&nf_conntrack_expect_lock);
hlist_for_each_entry(pair_exp, &help->expectations, lnode) {
if (pair_exp->tuple.src.l3num == nf_ct_l3num(ct) &&
pair_exp->tuple.dst.protonum == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum &&
nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &pair_exp->saved_addr) &&
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all == pair_exp->saved_proto.all) {
range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
range.min_proto.all = range.max_proto.all = pair_exp->tuple.dst.u.all;
range.min_addr = range.max_addr = pair_exp->tuple.dst.u3;
range_set_for_snat = 1;
break;
}
}
spin_unlock_bh(&nf_conntrack_expect_lock);
}
/* When no paired expectation has been found, change src to
* where master sends to, but only if the connection actually came
* from the same source.
*/
if (!range_set_for_snat &&
nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
&ct->master->tuplehash[exp->dir].tuple.src.u3)) {
range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_addr = range.max_addr
= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
range_set_for_snat = 1;
}
/* Perform SRC manip. */
if (range_set_for_snat)
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
}
static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
......
......@@ -2295,15 +2295,52 @@ struct nft_rule_dump_ctx {
char *chain;
};
static int __nf_tables_dump_rules(struct sk_buff *skb,
unsigned int *idx,
struct netlink_callback *cb,
const struct nft_table *table,
const struct nft_chain *chain)
{
struct net *net = sock_net(skb->sk);
unsigned int s_idx = cb->args[0];
const struct nft_rule *rule;
int rc = 1;
list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_is_active(net, rule))
goto cont;
if (*idx < s_idx)
goto cont;
if (*idx > s_idx) {
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
}
if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
table, chain, rule) < 0)
goto out_unfinished;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
(*idx)++;
}
rc = 0;
out_unfinished:
cb->args[0] = *idx;
return rc;
}
static int nf_tables_dump_rules(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
const struct nft_rule_dump_ctx *ctx = cb->data;
const struct nft_table *table;
struct nft_table *table;
const struct nft_chain *chain;
const struct nft_rule *rule;
unsigned int idx = 0, s_idx = cb->args[0];
unsigned int idx = 0;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
......@@ -2317,37 +2354,34 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
continue;
list_for_each_entry_rcu(chain, &table->chains, list) {
if (ctx && ctx->chain &&
strcmp(ctx->chain, chain->name) != 0)
continue;
if (ctx && ctx->chain) {
struct rhlist_head *list, *tmp;
list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_is_active(net, rule))
goto cont;
if (idx < s_idx)
goto cont;
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
table, chain, rule) < 0)
list = rhltable_lookup(&table->chains_ht, ctx->chain,
nft_chain_ht_params);
if (!list)
goto done;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
if (!nft_is_active(net, chain))
continue;
__nf_tables_dump_rules(skb, &idx,
cb, table, chain);
break;
}
goto done;
}
list_for_each_entry_rcu(chain, &table->chains, list) {
if (__nf_tables_dump_rules(skb, &idx, cb, table, chain))
goto done;
}
if (ctx && ctx->table)
break;
}
done:
rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
......
......@@ -148,7 +148,7 @@ static void
instance_put(struct nfulnl_instance *inst)
{
if (inst && refcount_dec_and_test(&inst->use))
call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
call_rcu(&inst->rcu, nfulnl_instance_free_rcu);
}
static void nfulnl_timer(struct timer_list *t);
......
......@@ -260,7 +260,7 @@ static inline void
dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
{
hlist_del_rcu(&ent->node);
call_rcu_bh(&ent->rcu, dsthash_free_rcu);
call_rcu(&ent->rcu, dsthash_free_rcu);
ht->count--;
}
static void htable_gc(struct work_struct *work);
......@@ -1326,7 +1326,7 @@ static void __exit hashlimit_mt_exit(void)
xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
unregister_pernet_subsys(&hashlimit_net_ops);
rcu_barrier_bh();
rcu_barrier();
kmem_cache_destroy(hashlimit_cachep);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment