Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
9bb862be
Commit
9bb862be
authored
May 08, 2012
by
David S. Miller
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
git://1984.lsi.us.es/net-next
parents
b44907e6
d16cf20e
Changes
34
Show whitespace changes
Inline
Side-by-side
Showing
34 changed files
with
856 additions
and
1708 deletions
+856
-1708
Documentation/ABI/removed/ip_queue
Documentation/ABI/removed/ip_queue
+9
-0
Documentation/networking/ip-sysctl.txt
Documentation/networking/ip-sysctl.txt
+11
-2
include/linux/ip_vs.h
include/linux/ip_vs.h
+5
-0
include/linux/netfilter/nf_conntrack_common.h
include/linux/netfilter/nf_conntrack_common.h
+4
-0
include/linux/netfilter_ipv4/Kbuild
include/linux/netfilter_ipv4/Kbuild
+0
-1
include/linux/netfilter_ipv4/ip_queue.h
include/linux/netfilter_ipv4/ip_queue.h
+0
-72
include/linux/netlink.h
include/linux/netlink.h
+1
-1
include/net/ip_vs.h
include/net/ip_vs.h
+79
-8
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack.h
+2
-8
include/net/netfilter/nf_conntrack_helper.h
include/net/netfilter/nf_conntrack_helper.h
+2
-2
include/net/netns/conntrack.h
include/net/netns/conntrack.h
+3
-0
net/bridge/br_netfilter.c
net/bridge/br_netfilter.c
+24
-2
net/core/sock.c
net/core/sock.c
+2
-0
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/Makefile
+0
-3
net/ipv4/netfilter/ip_queue.c
net/ipv4/netfilter/ip_queue.c
+0
-639
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/Kconfig
+0
-22
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/Makefile
+0
-1
net/ipv6/netfilter/ip6_queue.c
net/ipv6/netfilter/ip6_queue.c
+0
-641
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_conn.c
+52
-18
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_core.c
+2
-28
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_ctl.c
+66
-4
net/netfilter/ipvs/ip_vs_dh.c
net/netfilter/ipvs/ip_vs_dh.c
+1
-1
net/netfilter/ipvs/ip_vs_ftp.c
net/netfilter/ipvs/ip_vs_ftp.c
+1
-1
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblc.c
+1
-1
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_lblcr.c
+1
-1
net/netfilter/ipvs/ip_vs_proto.c
net/netfilter/ipvs/ip_vs_proto.c
+3
-3
net/netfilter/ipvs/ip_vs_sh.c
net/netfilter/ipvs/ip_vs_sh.c
+1
-1
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/ipvs/ip_vs_sync.c
+455
-207
net/netfilter/ipvs/ip_vs_wrr.c
net/netfilter/ipvs/ip_vs_wrr.c
+1
-1
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_core.c
+7
-8
net/netfilter/nf_conntrack_ecache.c
net/netfilter/nf_conntrack_ecache.c
+4
-6
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_helper.c
+110
-12
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_netlink.c
+9
-1
security/selinux/nlmsgtab.c
security/selinux/nlmsgtab.c
+0
-13
No files found.
Documentation/ABI/removed/ip_queue
0 → 100644
View file @
9bb862be
What: ip_queue
Date: finally removed in kernel v3.5.0
Contact: Pablo Neira Ayuso <pablo@netfilter.org>
Description:
ip_queue has been replaced by nfnetlink_queue which provides
more advanced queueing mechanism to user-space. The ip_queue
module was already announced to become obsolete years ago.
Users:
Documentation/networking/ip-sysctl.txt
View file @
9bb862be
...
@@ -1301,13 +1301,22 @@ bridge-nf-call-ip6tables - BOOLEAN
...
@@ -1301,13 +1301,22 @@ bridge-nf-call-ip6tables - BOOLEAN
bridge-nf-filter-vlan-tagged - BOOLEAN
bridge-nf-filter-vlan-tagged - BOOLEAN
1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
0 : disable this.
0 : disable this.
Default:
1
Default:
0
bridge-nf-filter-pppoe-tagged - BOOLEAN
bridge-nf-filter-pppoe-tagged - BOOLEAN
1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
0 : disable this.
0 : disable this.
Default:
1
Default:
0
bridge-nf-pass-vlan-input-dev - BOOLEAN
1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan
interface on the bridge and set the netfilter input device to the vlan.
This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT
target work with vlan-on-top-of-bridge interfaces. When no matching
vlan interface is found, or this switch is off, the input device is
set to the bridge interface.
0: disable bridge netfilter vlan interface lookup.
Default: 0
proc/sys/net/sctp/* Variables:
proc/sys/net/sctp/* Variables:
...
...
include/linux/ip_vs.h
View file @
9bb862be
...
@@ -89,6 +89,7 @@
...
@@ -89,6 +89,7 @@
#define IP_VS_CONN_F_TEMPLATE 0x1000
/* template, not connection */
#define IP_VS_CONN_F_TEMPLATE 0x1000
/* template, not connection */
#define IP_VS_CONN_F_ONE_PACKET 0x2000
/* forward only one packet */
#define IP_VS_CONN_F_ONE_PACKET 0x2000
/* forward only one packet */
/* Initial bits allowed in backup server */
#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
IP_VS_CONN_F_NOOUTPUT | \
IP_VS_CONN_F_NOOUTPUT | \
IP_VS_CONN_F_INACTIVE | \
IP_VS_CONN_F_INACTIVE | \
...
@@ -97,6 +98,10 @@
...
@@ -97,6 +98,10 @@
IP_VS_CONN_F_TEMPLATE \
IP_VS_CONN_F_TEMPLATE \
)
)
/* Bits allowed to update in backup server */
#define IP_VS_CONN_F_BACKUP_UPD_MASK (IP_VS_CONN_F_INACTIVE | \
IP_VS_CONN_F_SEQ_MASK)
/* Flags that are not sent to backup server start from bit 16 */
/* Flags that are not sent to backup server start from bit 16 */
#define IP_VS_CONN_F_NFCT (1 << 16)
/* use netfilter conntrack */
#define IP_VS_CONN_F_NFCT (1 << 16)
/* use netfilter conntrack */
...
...
include/linux/netfilter/nf_conntrack_common.h
View file @
9bb862be
...
@@ -83,6 +83,10 @@ enum ip_conntrack_status {
...
@@ -83,6 +83,10 @@ enum ip_conntrack_status {
/* Conntrack is a fake untracked entry */
/* Conntrack is a fake untracked entry */
IPS_UNTRACKED_BIT
=
12
,
IPS_UNTRACKED_BIT
=
12
,
IPS_UNTRACKED
=
(
1
<<
IPS_UNTRACKED_BIT
),
IPS_UNTRACKED
=
(
1
<<
IPS_UNTRACKED_BIT
),
/* Conntrack got a helper explicitly attached via CT target. */
IPS_HELPER_BIT
=
13
,
IPS_HELPER
=
(
1
<<
IPS_HELPER_BIT
),
};
};
/* Connection tracking event types */
/* Connection tracking event types */
...
...
include/linux/netfilter_ipv4/Kbuild
View file @
9bb862be
header-y += ip_queue.h
header-y += ip_tables.h
header-y += ip_tables.h
header-y += ipt_CLUSTERIP.h
header-y += ipt_CLUSTERIP.h
header-y += ipt_ECN.h
header-y += ipt_ECN.h
...
...
include/linux/netfilter_ipv4/ip_queue.h
deleted
100644 → 0
View file @
b44907e6
/*
* This is a module which is used for queueing IPv4 packets and
* communicating with userspace via netlink.
*
* (C) 2000 James Morris, this code is GPL.
*/
#ifndef _IP_QUEUE_H
#define _IP_QUEUE_H
#ifdef __KERNEL__
#ifdef DEBUG_IPQ
#define QDEBUG(x...) printk(KERN_DEBUG ## x)
#else
#define QDEBUG(x...)
#endif
/* DEBUG_IPQ */
#else
#include <net/if.h>
#endif
/* ! __KERNEL__ */
/* Messages sent from kernel */
typedef
struct
ipq_packet_msg
{
unsigned
long
packet_id
;
/* ID of queued packet */
unsigned
long
mark
;
/* Netfilter mark value */
long
timestamp_sec
;
/* Packet arrival time (seconds) */
long
timestamp_usec
;
/* Packet arrvial time (+useconds) */
unsigned
int
hook
;
/* Netfilter hook we rode in on */
char
indev_name
[
IFNAMSIZ
];
/* Name of incoming interface */
char
outdev_name
[
IFNAMSIZ
];
/* Name of outgoing interface */
__be16
hw_protocol
;
/* Hardware protocol (network order) */
unsigned
short
hw_type
;
/* Hardware type */
unsigned
char
hw_addrlen
;
/* Hardware address length */
unsigned
char
hw_addr
[
8
];
/* Hardware address */
size_t
data_len
;
/* Length of packet data */
unsigned
char
payload
[
0
];
/* Optional packet data */
}
ipq_packet_msg_t
;
/* Messages sent from userspace */
typedef
struct
ipq_mode_msg
{
unsigned
char
value
;
/* Requested mode */
size_t
range
;
/* Optional range of packet requested */
}
ipq_mode_msg_t
;
typedef
struct
ipq_verdict_msg
{
unsigned
int
value
;
/* Verdict to hand to netfilter */
unsigned
long
id
;
/* Packet ID for this verdict */
size_t
data_len
;
/* Length of replacement data */
unsigned
char
payload
[
0
];
/* Optional replacement packet */
}
ipq_verdict_msg_t
;
typedef
struct
ipq_peer_msg
{
union
{
ipq_verdict_msg_t
verdict
;
ipq_mode_msg_t
mode
;
}
msg
;
}
ipq_peer_msg_t
;
/* Packet delivery modes */
enum
{
IPQ_COPY_NONE
,
/* Initial mode, packets are dropped */
IPQ_COPY_META
,
/* Copy metadata */
IPQ_COPY_PACKET
/* Copy metadata + packet (range) */
};
#define IPQ_COPY_MAX IPQ_COPY_PACKET
/* Types of messages */
#define IPQM_BASE 0x10
/* standard netlink messages below this */
#define IPQM_MODE (IPQM_BASE + 1)
/* Mode request from peer */
#define IPQM_VERDICT (IPQM_BASE + 2)
/* Verdict from peer */
#define IPQM_PACKET (IPQM_BASE + 3)
/* Packet from kernel */
#define IPQM_MAX (IPQM_BASE + 4)
#endif
/*_IP_QUEUE_H*/
include/linux/netlink.h
View file @
9bb862be
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
#define NETLINK_ROUTE 0
/* Routing/device hook */
#define NETLINK_ROUTE 0
/* Routing/device hook */
#define NETLINK_UNUSED 1
/* Unused number */
#define NETLINK_UNUSED 1
/* Unused number */
#define NETLINK_USERSOCK 2
/* Reserved for user mode socket protocols */
#define NETLINK_USERSOCK 2
/* Reserved for user mode socket protocols */
#define NETLINK_FIREWALL 3
/*
Firewalling hook
*/
#define NETLINK_FIREWALL 3
/*
Unused number, formerly ip_queue
*/
#define NETLINK_SOCK_DIAG 4
/* socket monitoring */
#define NETLINK_SOCK_DIAG 4
/* socket monitoring */
#define NETLINK_NFLOG 5
/* netfilter/iptables ULOG */
#define NETLINK_NFLOG 5
/* netfilter/iptables ULOG */
#define NETLINK_XFRM 6
/* ipsec */
#define NETLINK_XFRM 6
/* ipsec */
...
...
include/net/ip_vs.h
View file @
9bb862be
...
@@ -504,6 +504,7 @@ struct ip_vs_conn {
...
@@ -504,6 +504,7 @@ struct ip_vs_conn {
* state transition triggerd
* state transition triggerd
* synchronization
* synchronization
*/
*/
unsigned
long
sync_endtime
;
/* jiffies + sent_retries */
/* Control members */
/* Control members */
struct
ip_vs_conn
*
control
;
/* Master control connection */
struct
ip_vs_conn
*
control
;
/* Master control connection */
...
@@ -783,6 +784,16 @@ struct ip_vs_app {
...
@@ -783,6 +784,16 @@ struct ip_vs_app {
void
(
*
timeout_change
)(
struct
ip_vs_app
*
app
,
int
flags
);
void
(
*
timeout_change
)(
struct
ip_vs_app
*
app
,
int
flags
);
};
};
struct
ipvs_master_sync_state
{
struct
list_head
sync_queue
;
struct
ip_vs_sync_buff
*
sync_buff
;
int
sync_queue_len
;
unsigned
int
sync_queue_delay
;
struct
task_struct
*
master_thread
;
struct
delayed_work
master_wakeup_work
;
struct
netns_ipvs
*
ipvs
;
};
/* IPVS in network namespace */
/* IPVS in network namespace */
struct
netns_ipvs
{
struct
netns_ipvs
{
int
gen
;
/* Generation */
int
gen
;
/* Generation */
...
@@ -869,10 +880,15 @@ struct netns_ipvs {
...
@@ -869,10 +880,15 @@ struct netns_ipvs {
#endif
#endif
int
sysctl_snat_reroute
;
int
sysctl_snat_reroute
;
int
sysctl_sync_ver
;
int
sysctl_sync_ver
;
int
sysctl_sync_ports
;
int
sysctl_sync_qlen_max
;
int
sysctl_sync_sock_size
;
int
sysctl_cache_bypass
;
int
sysctl_cache_bypass
;
int
sysctl_expire_nodest_conn
;
int
sysctl_expire_nodest_conn
;
int
sysctl_expire_quiescent_template
;
int
sysctl_expire_quiescent_template
;
int
sysctl_sync_threshold
[
2
];
int
sysctl_sync_threshold
[
2
];
unsigned
int
sysctl_sync_refresh_period
;
int
sysctl_sync_retries
;
int
sysctl_nat_icmp_send
;
int
sysctl_nat_icmp_send
;
/* ip_vs_lblc */
/* ip_vs_lblc */
...
@@ -888,13 +904,11 @@ struct netns_ipvs {
...
@@ -888,13 +904,11 @@ struct netns_ipvs {
spinlock_t
est_lock
;
spinlock_t
est_lock
;
struct
timer_list
est_timer
;
/* Estimation timer */
struct
timer_list
est_timer
;
/* Estimation timer */
/* ip_vs_sync */
/* ip_vs_sync */
struct
list_head
sync_queue
;
spinlock_t
sync_lock
;
spinlock_t
sync_lock
;
struct
ip
_vs_sync_buff
*
sync_buff
;
struct
ip
vs_master_sync_state
*
ms
;
spinlock_t
sync_buff_lock
;
spinlock_t
sync_buff_lock
;
struct
sockaddr_in
sync_mcast_addr
;
struct
task_struct
**
backup_threads
;
struct
task_struct
*
master_thread
;
int
threads_mask
;
struct
task_struct
*
backup_thread
;
int
send_mesg_maxlen
;
int
send_mesg_maxlen
;
int
recv_mesg_maxlen
;
int
recv_mesg_maxlen
;
volatile
int
sync_state
;
volatile
int
sync_state
;
...
@@ -911,6 +925,14 @@ struct netns_ipvs {
...
@@ -911,6 +925,14 @@ struct netns_ipvs {
#define DEFAULT_SYNC_THRESHOLD 3
#define DEFAULT_SYNC_THRESHOLD 3
#define DEFAULT_SYNC_PERIOD 50
#define DEFAULT_SYNC_PERIOD 50
#define DEFAULT_SYNC_VER 1
#define DEFAULT_SYNC_VER 1
#define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ)
#define DEFAULT_SYNC_RETRIES 0
#define IPVS_SYNC_WAKEUP_RATE 8
#define IPVS_SYNC_QLEN_MAX (IPVS_SYNC_WAKEUP_RATE * 4)
#define IPVS_SYNC_SEND_DELAY (HZ / 50)
#define IPVS_SYNC_CHECK_PERIOD HZ
#define IPVS_SYNC_FLUSH_TIME (HZ * 2)
#define IPVS_SYNC_PORTS_MAX (1 << 6)
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_SYSCTL
...
@@ -921,7 +943,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
...
@@ -921,7 +943,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
static
inline
int
sysctl_sync_period
(
struct
netns_ipvs
*
ipvs
)
static
inline
int
sysctl_sync_period
(
struct
netns_ipvs
*
ipvs
)
{
{
return
ipvs
->
sysctl_sync_threshold
[
1
];
return
ACCESS_ONCE
(
ipvs
->
sysctl_sync_threshold
[
1
]);
}
static
inline
unsigned
int
sysctl_sync_refresh_period
(
struct
netns_ipvs
*
ipvs
)
{
return
ACCESS_ONCE
(
ipvs
->
sysctl_sync_refresh_period
);
}
static
inline
int
sysctl_sync_retries
(
struct
netns_ipvs
*
ipvs
)
{
return
ipvs
->
sysctl_sync_retries
;
}
}
static
inline
int
sysctl_sync_ver
(
struct
netns_ipvs
*
ipvs
)
static
inline
int
sysctl_sync_ver
(
struct
netns_ipvs
*
ipvs
)
...
@@ -929,6 +961,21 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
...
@@ -929,6 +961,21 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
return
ipvs
->
sysctl_sync_ver
;
return
ipvs
->
sysctl_sync_ver
;
}
}
static
inline
int
sysctl_sync_ports
(
struct
netns_ipvs
*
ipvs
)
{
return
ACCESS_ONCE
(
ipvs
->
sysctl_sync_ports
);
}
static
inline
int
sysctl_sync_qlen_max
(
struct
netns_ipvs
*
ipvs
)
{
return
ipvs
->
sysctl_sync_qlen_max
;
}
static
inline
int
sysctl_sync_sock_size
(
struct
netns_ipvs
*
ipvs
)
{
return
ipvs
->
sysctl_sync_sock_size
;
}
#else
#else
static
inline
int
sysctl_sync_threshold
(
struct
netns_ipvs
*
ipvs
)
static
inline
int
sysctl_sync_threshold
(
struct
netns_ipvs
*
ipvs
)
...
@@ -941,11 +988,36 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
...
@@ -941,11 +988,36 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
return
DEFAULT_SYNC_PERIOD
;
return
DEFAULT_SYNC_PERIOD
;
}
}
static
inline
unsigned
int
sysctl_sync_refresh_period
(
struct
netns_ipvs
*
ipvs
)
{
return
DEFAULT_SYNC_REFRESH_PERIOD
;
}
static
inline
int
sysctl_sync_retries
(
struct
netns_ipvs
*
ipvs
)
{
return
DEFAULT_SYNC_RETRIES
&
3
;
}
static
inline
int
sysctl_sync_ver
(
struct
netns_ipvs
*
ipvs
)
static
inline
int
sysctl_sync_ver
(
struct
netns_ipvs
*
ipvs
)
{
{
return
DEFAULT_SYNC_VER
;
return
DEFAULT_SYNC_VER
;
}
}
static
inline
int
sysctl_sync_ports
(
struct
netns_ipvs
*
ipvs
)
{
return
1
;
}
static
inline
int
sysctl_sync_qlen_max
(
struct
netns_ipvs
*
ipvs
)
{
return
IPVS_SYNC_QLEN_MAX
;
}
static
inline
int
sysctl_sync_sock_size
(
struct
netns_ipvs
*
ipvs
)
{
return
0
;
}
#endif
#endif
/*
/*
...
@@ -1185,7 +1257,6 @@ extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
...
@@ -1185,7 +1257,6 @@ extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
extern
struct
ip_vs_stats
ip_vs_stats
;
extern
struct
ip_vs_stats
ip_vs_stats
;
extern
int
sysctl_ip_vs_sync_ver
;
extern
int
sysctl_ip_vs_sync_ver
;
extern
void
ip_vs_sync_switch_mode
(
struct
net
*
net
,
int
mode
);
extern
struct
ip_vs_service
*
extern
struct
ip_vs_service
*
ip_vs_service_get
(
struct
net
*
net
,
int
af
,
__u32
fwmark
,
__u16
protocol
,
ip_vs_service_get
(
struct
net
*
net
,
int
af
,
__u32
fwmark
,
__u16
protocol
,
const
union
nf_inet_addr
*
vaddr
,
__be16
vport
);
const
union
nf_inet_addr
*
vaddr
,
__be16
vport
);
...
@@ -1219,7 +1290,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
...
@@ -1219,7 +1290,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
extern
int
start_sync_thread
(
struct
net
*
net
,
int
state
,
char
*
mcast_ifn
,
extern
int
start_sync_thread
(
struct
net
*
net
,
int
state
,
char
*
mcast_ifn
,
__u8
syncid
);
__u8
syncid
);
extern
int
stop_sync_thread
(
struct
net
*
net
,
int
state
);
extern
int
stop_sync_thread
(
struct
net
*
net
,
int
state
);
extern
void
ip_vs_sync_conn
(
struct
net
*
net
,
struct
ip_vs_conn
*
cp
);
extern
void
ip_vs_sync_conn
(
struct
net
*
net
,
struct
ip_vs_conn
*
cp
,
int
pkts
);
/*
/*
...
...
include/net/netfilter/nf_conntrack.h
View file @
9bb862be
...
@@ -321,14 +321,8 @@ extern unsigned int nf_conntrack_max;
...
@@ -321,14 +321,8 @@ extern unsigned int nf_conntrack_max;
extern
unsigned
int
nf_conntrack_hash_rnd
;
extern
unsigned
int
nf_conntrack_hash_rnd
;
void
init_nf_conntrack_hash_rnd
(
void
);
void
init_nf_conntrack_hash_rnd
(
void
);
#define NF_CT_STAT_INC(net, count) \
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
__this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) \
do { \
local_bh_disable(); \
__this_cpu_inc((net)->ct.stat->count); \
local_bh_enable(); \
} while (0)
#define MODULE_ALIAS_NFCT_HELPER(helper) \
#define MODULE_ALIAS_NFCT_HELPER(helper) \
MODULE_ALIAS("nfct-helper-" helper)
MODULE_ALIAS("nfct-helper-" helper)
...
...
include/net/netfilter/nf_conntrack_helper.h
View file @
9bb862be
...
@@ -60,8 +60,8 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
...
@@ -60,8 +60,8 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
return
nf_ct_ext_find
(
ct
,
NF_CT_EXT_HELPER
);
return
nf_ct_ext_find
(
ct
,
NF_CT_EXT_HELPER
);
}
}
extern
int
nf_conntrack_helper_init
(
void
);
extern
int
nf_conntrack_helper_init
(
struct
net
*
net
);
extern
void
nf_conntrack_helper_fini
(
void
);
extern
void
nf_conntrack_helper_fini
(
struct
net
*
net
);
extern
int
nf_conntrack_broadcast_help
(
struct
sk_buff
*
skb
,
extern
int
nf_conntrack_broadcast_help
(
struct
sk_buff
*
skb
,
unsigned
int
protoff
,
unsigned
int
protoff
,
...
...
include/net/netns/conntrack.h
View file @
9bb862be
...
@@ -26,11 +26,14 @@ struct netns_ct {
...
@@ -26,11 +26,14 @@ struct netns_ct {
int
sysctl_tstamp
;
int
sysctl_tstamp
;
int
sysctl_checksum
;
int
sysctl_checksum
;
unsigned
int
sysctl_log_invalid
;
/* Log invalid packets */
unsigned
int
sysctl_log_invalid
;
/* Log invalid packets */
int
sysctl_auto_assign_helper
;
bool
auto_assign_helper_warned
;
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_SYSCTL
struct
ctl_table_header
*
sysctl_header
;
struct
ctl_table_header
*
sysctl_header
;
struct
ctl_table_header
*
acct_sysctl_header
;
struct
ctl_table_header
*
acct_sysctl_header
;
struct
ctl_table_header
*
tstamp_sysctl_header
;
struct
ctl_table_header
*
tstamp_sysctl_header
;
struct
ctl_table_header
*
event_sysctl_header
;
struct
ctl_table_header
*
event_sysctl_header
;
struct
ctl_table_header
*
helper_sysctl_header
;
#endif
#endif
char
*
slabname
;
char
*
slabname
;
};
};
...
...
net/bridge/br_netfilter.c
View file @
9bb862be
...
@@ -54,12 +54,14 @@ static int brnf_call_ip6tables __read_mostly = 1;
...
@@ -54,12 +54,14 @@ static int brnf_call_ip6tables __read_mostly = 1;
static
int
brnf_call_arptables
__read_mostly
=
1
;
static
int
brnf_call_arptables
__read_mostly
=
1
;
static
int
brnf_filter_vlan_tagged
__read_mostly
=
0
;
static
int
brnf_filter_vlan_tagged
__read_mostly
=
0
;
static
int
brnf_filter_pppoe_tagged
__read_mostly
=
0
;
static
int
brnf_filter_pppoe_tagged
__read_mostly
=
0
;
static
int
brnf_pass_vlan_indev
__read_mostly
=
0
;
#else
#else
#define brnf_call_iptables 1
#define brnf_call_iptables 1
#define brnf_call_ip6tables 1
#define brnf_call_ip6tables 1
#define brnf_call_arptables 1
#define brnf_call_arptables 1
#define brnf_filter_vlan_tagged 0
#define brnf_filter_vlan_tagged 0
#define brnf_filter_pppoe_tagged 0
#define brnf_filter_pppoe_tagged 0
#define brnf_pass_vlan_indev 0
#endif
#endif
#define IS_IP(skb) \
#define IS_IP(skb) \
...
@@ -503,6 +505,19 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
...
@@ -503,6 +505,19 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
return
0
;
return
0
;
}
}
static
struct
net_device
*
brnf_get_logical_dev
(
struct
sk_buff
*
skb
,
const
struct
net_device
*
dev
)
{
struct
net_device
*
vlan
,
*
br
;
br
=
bridge_parent
(
dev
);
if
(
brnf_pass_vlan_indev
==
0
||
!
vlan_tx_tag_present
(
skb
))
return
br
;
vlan
=
__vlan_find_dev_deep
(
br
,
vlan_tx_tag_get
(
skb
)
&
VLAN_VID_MASK
);
return
vlan
?
vlan
:
br
;
}
/* Some common code for IPv4/IPv6 */
/* Some common code for IPv4/IPv6 */
static
struct
net_device
*
setup_pre_routing
(
struct
sk_buff
*
skb
)
static
struct
net_device
*
setup_pre_routing
(
struct
sk_buff
*
skb
)
{
{
...
@@ -515,7 +530,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
...
@@ -515,7 +530,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
nf_bridge
->
mask
|=
BRNF_NF_BRIDGE_PREROUTING
;
nf_bridge
->
mask
|=
BRNF_NF_BRIDGE_PREROUTING
;
nf_bridge
->
physindev
=
skb
->
dev
;
nf_bridge
->
physindev
=
skb
->
dev
;
skb
->
dev
=
br
idge_parent
(
skb
->
dev
);
skb
->
dev
=
br
nf_get_logical_dev
(
skb
,
skb
->
dev
);
if
(
skb
->
protocol
==
htons
(
ETH_P_8021Q
))
if
(
skb
->
protocol
==
htons
(
ETH_P_8021Q
))
nf_bridge
->
mask
|=
BRNF_8021Q
;
nf_bridge
->
mask
|=
BRNF_8021Q
;
else
if
(
skb
->
protocol
==
htons
(
ETH_P_PPP_SES
))
else
if
(
skb
->
protocol
==
htons
(
ETH_P_PPP_SES
))
...
@@ -774,7 +789,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
...
@@ -774,7 +789,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
else
else
skb
->
protocol
=
htons
(
ETH_P_IPV6
);
skb
->
protocol
=
htons
(
ETH_P_IPV6
);
NF_HOOK
(
pf
,
NF_INET_FORWARD
,
skb
,
br
idge_parent
(
in
),
parent
,
NF_HOOK
(
pf
,
NF_INET_FORWARD
,
skb
,
br
nf_get_logical_dev
(
skb
,
in
),
parent
,
br_nf_forward_finish
);
br_nf_forward_finish
);
return
NF_STOLEN
;
return
NF_STOLEN
;
...
@@ -1002,6 +1017,13 @@ static ctl_table brnf_table[] = {
...
@@ -1002,6 +1017,13 @@ static ctl_table brnf_table[] = {
.
mode
=
0644
,
.
mode
=
0644
,
.
proc_handler
=
brnf_sysctl_call_tables
,
.
proc_handler
=
brnf_sysctl_call_tables
,
},
},
{
.
procname
=
"bridge-nf-pass-vlan-input-dev"
,
.
data
=
&
brnf_pass_vlan_indev
,
.
maxlen
=
sizeof
(
int
),
.
mode
=
0644
,
.
proc_handler
=
brnf_sysctl_call_tables
,
},
{
}
{
}
};
};
#endif
#endif
...
...
net/core/sock.c
View file @
9bb862be
...
@@ -259,7 +259,9 @@ static struct lock_class_key af_callback_keys[AF_MAX];
...
@@ -259,7 +259,9 @@ static struct lock_class_key af_callback_keys[AF_MAX];
/* Run time adjustable parameters. */
/* Run time adjustable parameters. */
__u32
sysctl_wmem_max
__read_mostly
=
SK_WMEM_MAX
;
__u32
sysctl_wmem_max
__read_mostly
=
SK_WMEM_MAX
;
EXPORT_SYMBOL
(
sysctl_wmem_max
);
__u32
sysctl_rmem_max
__read_mostly
=
SK_RMEM_MAX
;
__u32
sysctl_rmem_max
__read_mostly
=
SK_RMEM_MAX
;
EXPORT_SYMBOL
(
sysctl_rmem_max
);
__u32
sysctl_wmem_default
__read_mostly
=
SK_WMEM_MAX
;
__u32
sysctl_wmem_default
__read_mostly
=
SK_WMEM_MAX
;
__u32
sysctl_rmem_default
__read_mostly
=
SK_RMEM_MAX
;
__u32
sysctl_rmem_default
__read_mostly
=
SK_RMEM_MAX
;
...
...
net/ipv4/netfilter/Makefile
View file @
9bb862be
...
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
...
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
# just filtering instance of ARP tables for now
# just filtering instance of ARP tables for now
obj-$(CONFIG_IP_NF_ARPFILTER)
+=
arptable_filter.o
obj-$(CONFIG_IP_NF_ARPFILTER)
+=
arptable_filter.o
obj-$(CONFIG_IP_NF_QUEUE)
+=
ip_queue.o
net/ipv4/netfilter/ip_queue.c
deleted
100644 → 0
View file @
b44907e6
/*
* This is a module which is used for queueing IPv4 packets and
* communicating with userspace via netlink.
*
* (C) 2000-2002 James Morris <jmorris@intercode.com.au>
* (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_queue.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/security.h>
#include <linux/net.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/route.h>
#include <net/netfilter/nf_queue.h>
#include <net/ip.h>
#define IPQ_QMAX_DEFAULT 1024
#define IPQ_PROC_FS_NAME "ip_queue"
#define NET_IPQ_QMAX 2088
#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
typedef
int
(
*
ipq_cmpfn
)(
struct
nf_queue_entry
*
,
unsigned
long
);
static
unsigned
char
copy_mode
__read_mostly
=
IPQ_COPY_NONE
;
static
unsigned
int
queue_maxlen
__read_mostly
=
IPQ_QMAX_DEFAULT
;
static
DEFINE_SPINLOCK
(
queue_lock
);
static
int
peer_pid
__read_mostly
;
static
unsigned
int
copy_range
__read_mostly
;
static
unsigned
int
queue_total
;
static
unsigned
int
queue_dropped
=
0
;
static
unsigned
int
queue_user_dropped
=
0
;
static
struct
sock
*
ipqnl
__read_mostly
;
static
LIST_HEAD
(
queue_list
);
static
DEFINE_MUTEX
(
ipqnl_mutex
);
static
inline
void
__ipq_enqueue_entry
(
struct
nf_queue_entry
*
entry
)
{
list_add_tail
(
&
entry
->
list
,
&
queue_list
);
queue_total
++
;
}
static
inline
int
__ipq_set_mode
(
unsigned
char
mode
,
unsigned
int
range
)
{
int
status
=
0
;
switch
(
mode
)
{
case
IPQ_COPY_NONE
:
case
IPQ_COPY_META
:
copy_mode
=
mode
;
copy_range
=
0
;
break
;
case
IPQ_COPY_PACKET
:
if
(
range
>
0xFFFF
)
range
=
0xFFFF
;
copy_range
=
range
;
copy_mode
=
mode
;
break
;
default:
status
=
-
EINVAL
;
}
return
status
;
}
static
void
__ipq_flush
(
ipq_cmpfn
cmpfn
,
unsigned
long
data
);
static
inline
void
__ipq_reset
(
void
)
{
peer_pid
=
0
;
net_disable_timestamp
();
__ipq_set_mode
(
IPQ_COPY_NONE
,
0
);
__ipq_flush
(
NULL
,
0
);
}
static
struct
nf_queue_entry
*
ipq_find_dequeue_entry
(
unsigned
long
id
)
{
struct
nf_queue_entry
*
entry
=
NULL
,
*
i
;
spin_lock_bh
(
&
queue_lock
);
list_for_each_entry
(
i
,
&
queue_list
,
list
)
{
if
((
unsigned
long
)
i
==
id
)
{
entry
=
i
;
break
;
}
}
if
(
entry
)
{
list_del
(
&
entry
->
list
);
queue_total
--
;
}
spin_unlock_bh
(
&
queue_lock
);
return
entry
;
}
static
void
__ipq_flush
(
ipq_cmpfn
cmpfn
,
unsigned
long
data
)
{
struct
nf_queue_entry
*
entry
,
*
next
;
list_for_each_entry_safe
(
entry
,
next
,
&
queue_list
,
list
)
{
if
(
!
cmpfn
||
cmpfn
(
entry
,
data
))
{
list_del
(
&
entry
->
list
);
queue_total
--
;
nf_reinject
(
entry
,
NF_DROP
);
}
}
}
static
void
ipq_flush
(
ipq_cmpfn
cmpfn
,
unsigned
long
data
)
{
spin_lock_bh
(
&
queue_lock
);
__ipq_flush
(
cmpfn
,
data
);
spin_unlock_bh
(
&
queue_lock
);
}
static
struct
sk_buff
*
ipq_build_packet_message
(
struct
nf_queue_entry
*
entry
,
int
*
errp
)
{
sk_buff_data_t
old_tail
;
size_t
size
=
0
;
size_t
data_len
=
0
;
struct
sk_buff
*
skb
;
struct
ipq_packet_msg
*
pmsg
;
struct
nlmsghdr
*
nlh
;
struct
timeval
tv
;
switch
(
ACCESS_ONCE
(
copy_mode
))
{
case
IPQ_COPY_META
:
case
IPQ_COPY_NONE
:
size
=
NLMSG_SPACE
(
sizeof
(
*
pmsg
));
break
;
case
IPQ_COPY_PACKET
:
if
(
entry
->
skb
->
ip_summed
==
CHECKSUM_PARTIAL
&&
(
*
errp
=
skb_checksum_help
(
entry
->
skb
)))
return
NULL
;
data_len
=
ACCESS_ONCE
(
copy_range
);
if
(
data_len
==
0
||
data_len
>
entry
->
skb
->
len
)
data_len
=
entry
->
skb
->
len
;
size
=
NLMSG_SPACE
(
sizeof
(
*
pmsg
)
+
data_len
);
break
;
default:
*
errp
=
-
EINVAL
;
return
NULL
;
}
skb
=
alloc_skb
(
size
,
GFP_ATOMIC
);
if
(
!
skb
)
goto
nlmsg_failure
;
old_tail
=
skb
->
tail
;
nlh
=
NLMSG_PUT
(
skb
,
0
,
0
,
IPQM_PACKET
,
size
-
sizeof
(
*
nlh
));
pmsg
=
NLMSG_DATA
(
nlh
);
memset
(
pmsg
,
0
,
sizeof
(
*
pmsg
));
pmsg
->
packet_id
=
(
unsigned
long
)
entry
;
pmsg
->
data_len
=
data_len
;
tv
=
ktime_to_timeval
(
entry
->
skb
->
tstamp
);
pmsg
->
timestamp_sec
=
tv
.
tv_sec
;
pmsg
->
timestamp_usec
=
tv
.
tv_usec
;
pmsg
->
mark
=
entry
->
skb
->
mark
;
pmsg
->
hook
=
entry
->
hook
;
pmsg
->
hw_protocol
=
entry
->
skb
->
protocol
;
if
(
entry
->
indev
)
strcpy
(
pmsg
->
indev_name
,
entry
->
indev
->
name
);
else
pmsg
->
indev_name
[
0
]
=
'\0'
;
if
(
entry
->
outdev
)
strcpy
(
pmsg
->
outdev_name
,
entry
->
outdev
->
name
);
else
pmsg
->
outdev_name
[
0
]
=
'\0'
;
if
(
entry
->
indev
&&
entry
->
skb
->
dev
&&
entry
->
skb
->
mac_header
!=
entry
->
skb
->
network_header
)
{
pmsg
->
hw_type
=
entry
->
skb
->
dev
->
type
;
pmsg
->
hw_addrlen
=
dev_parse_header
(
entry
->
skb
,
pmsg
->
hw_addr
);
}
if
(
data_len
)
if
(
skb_copy_bits
(
entry
->
skb
,
0
,
pmsg
->
payload
,
data_len
))
BUG
();
nlh
->
nlmsg_len
=
skb
->
tail
-
old_tail
;
return
skb
;
nlmsg_failure:
kfree_skb
(
skb
);
*
errp
=
-
EINVAL
;
printk
(
KERN_ERR
"ip_queue: error creating packet message
\n
"
);
return
NULL
;
}
static
int
ipq_enqueue_packet
(
struct
nf_queue_entry
*
entry
,
unsigned
int
queuenum
)
{
int
status
=
-
EINVAL
;
struct
sk_buff
*
nskb
;
if
(
copy_mode
==
IPQ_COPY_NONE
)
return
-
EAGAIN
;
nskb
=
ipq_build_packet_message
(
entry
,
&
status
);
if
(
nskb
==
NULL
)
return
status
;
spin_lock_bh
(
&
queue_lock
);
if
(
!
peer_pid
)
goto
err_out_free_nskb
;
if
(
queue_total
>=
queue_maxlen
)
{
queue_dropped
++
;
status
=
-
ENOSPC
;
if
(
net_ratelimit
())
printk
(
KERN_WARNING
"ip_queue: full at %d entries, "
"dropping packets(s). Dropped: %d
\n
"
,
queue_total
,
queue_dropped
);
goto
err_out_free_nskb
;
}
/* netlink_unicast will either free the nskb or attach it to a socket */
status
=
netlink_unicast
(
ipqnl
,
nskb
,
peer_pid
,
MSG_DONTWAIT
);
if
(
status
<
0
)
{
queue_user_dropped
++
;
goto
err_out_unlock
;
}
__ipq_enqueue_entry
(
entry
);
spin_unlock_bh
(
&
queue_lock
);
return
status
;
err_out_free_nskb:
kfree_skb
(
nskb
);
err_out_unlock:
spin_unlock_bh
(
&
queue_lock
);
return
status
;
}
static
int
ipq_mangle_ipv4
(
ipq_verdict_msg_t
*
v
,
struct
nf_queue_entry
*
e
)
{
int
diff
;
struct
iphdr
*
user_iph
=
(
struct
iphdr
*
)
v
->
payload
;
struct
sk_buff
*
nskb
;
if
(
v
->
data_len
<
sizeof
(
*
user_iph
))
return
0
;
diff
=
v
->
data_len
-
e
->
skb
->
len
;
if
(
diff
<
0
)
{
if
(
pskb_trim
(
e
->
skb
,
v
->
data_len
))
return
-
ENOMEM
;
}
else
if
(
diff
>
0
)
{
if
(
v
->
data_len
>
0xFFFF
)
return
-
EINVAL
;
if
(
diff
>
skb_tailroom
(
e
->
skb
))
{
nskb
=
skb_copy_expand
(
e
->
skb
,
skb_headroom
(
e
->
skb
),
diff
,
GFP_ATOMIC
);
if
(
!
nskb
)
{
printk
(
KERN_WARNING
"ip_queue: error "
"in mangle, dropping packet
\n
"
);
return
-
ENOMEM
;
}
kfree_skb
(
e
->
skb
);
e
->
skb
=
nskb
;
}
skb_put
(
e
->
skb
,
diff
);
}
if
(
!
skb_make_writable
(
e
->
skb
,
v
->
data_len
))
return
-
ENOMEM
;
skb_copy_to_linear_data
(
e
->
skb
,
v
->
payload
,
v
->
data_len
);
e
->
skb
->
ip_summed
=
CHECKSUM_NONE
;
return
0
;
}
static
int
ipq_set_verdict
(
struct
ipq_verdict_msg
*
vmsg
,
unsigned
int
len
)
{
struct
nf_queue_entry
*
entry
;
if
(
vmsg
->
value
>
NF_MAX_VERDICT
||
vmsg
->
value
==
NF_STOLEN
)
return
-
EINVAL
;
entry
=
ipq_find_dequeue_entry
(
vmsg
->
id
);
if
(
entry
==
NULL
)
return
-
ENOENT
;
else
{
int
verdict
=
vmsg
->
value
;
if
(
vmsg
->
data_len
&&
vmsg
->
data_len
==
len
)
if
(
ipq_mangle_ipv4
(
vmsg
,
entry
)
<
0
)
verdict
=
NF_DROP
;
nf_reinject
(
entry
,
verdict
);
return
0
;
}
}
static
int
ipq_set_mode
(
unsigned
char
mode
,
unsigned
int
range
)
{
int
status
;
spin_lock_bh
(
&
queue_lock
);
status
=
__ipq_set_mode
(
mode
,
range
);
spin_unlock_bh
(
&
queue_lock
);
return
status
;
}
static
int
ipq_receive_peer
(
struct
ipq_peer_msg
*
pmsg
,
unsigned
char
type
,
unsigned
int
len
)
{
int
status
=
0
;
if
(
len
<
sizeof
(
*
pmsg
))
return
-
EINVAL
;
switch
(
type
)
{
case
IPQM_MODE
:
status
=
ipq_set_mode
(
pmsg
->
msg
.
mode
.
value
,
pmsg
->
msg
.
mode
.
range
);
break
;
case
IPQM_VERDICT
:
status
=
ipq_set_verdict
(
&
pmsg
->
msg
.
verdict
,
len
-
sizeof
(
*
pmsg
));
break
;
default:
status
=
-
EINVAL
;
}
return
status
;
}
static
int
dev_cmp
(
struct
nf_queue_entry
*
entry
,
unsigned
long
ifindex
)
{
if
(
entry
->
indev
)
if
(
entry
->
indev
->
ifindex
==
ifindex
)
return
1
;
if
(
entry
->
outdev
)
if
(
entry
->
outdev
->
ifindex
==
ifindex
)
return
1
;
#ifdef CONFIG_BRIDGE_NETFILTER
if
(
entry
->
skb
->
nf_bridge
)
{
if
(
entry
->
skb
->
nf_bridge
->
physindev
&&
entry
->
skb
->
nf_bridge
->
physindev
->
ifindex
==
ifindex
)
return
1
;
if
(
entry
->
skb
->
nf_bridge
->
physoutdev
&&
entry
->
skb
->
nf_bridge
->
physoutdev
->
ifindex
==
ifindex
)
return
1
;
}
#endif
return
0
;
}
static
void
ipq_dev_drop
(
int
ifindex
)
{
ipq_flush
(
dev_cmp
,
ifindex
);
}
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
static
inline
void
__ipq_rcv_skb
(
struct
sk_buff
*
skb
)
{
int
status
,
type
,
pid
,
flags
;
unsigned
int
nlmsglen
,
skblen
;
struct
nlmsghdr
*
nlh
;
bool
enable_timestamp
=
false
;
skblen
=
skb
->
len
;
if
(
skblen
<
sizeof
(
*
nlh
))
return
;
nlh
=
nlmsg_hdr
(
skb
);
nlmsglen
=
nlh
->
nlmsg_len
;
if
(
nlmsglen
<
sizeof
(
*
nlh
)
||
skblen
<
nlmsglen
)
return
;
pid
=
nlh
->
nlmsg_pid
;
flags
=
nlh
->
nlmsg_flags
;
if
(
pid
<=
0
||
!
(
flags
&
NLM_F_REQUEST
)
||
flags
&
NLM_F_MULTI
)
RCV_SKB_FAIL
(
-
EINVAL
);
if
(
flags
&
MSG_TRUNC
)
RCV_SKB_FAIL
(
-
ECOMM
);
type
=
nlh
->
nlmsg_type
;
if
(
type
<
NLMSG_NOOP
||
type
>=
IPQM_MAX
)
RCV_SKB_FAIL
(
-
EINVAL
);
if
(
type
<=
IPQM_BASE
)
return
;
if
(
!
capable
(
CAP_NET_ADMIN
))
RCV_SKB_FAIL
(
-
EPERM
);
spin_lock_bh
(
&
queue_lock
);
if
(
peer_pid
)
{
if
(
peer_pid
!=
pid
)
{
spin_unlock_bh
(
&
queue_lock
);
RCV_SKB_FAIL
(
-
EBUSY
);
}
}
else
{
enable_timestamp
=
true
;
peer_pid
=
pid
;
}
spin_unlock_bh
(
&
queue_lock
);
if
(
enable_timestamp
)
net_enable_timestamp
();
status
=
ipq_receive_peer
(
NLMSG_DATA
(
nlh
),
type
,
nlmsglen
-
NLMSG_LENGTH
(
0
));
if
(
status
<
0
)
RCV_SKB_FAIL
(
status
);
if
(
flags
&
NLM_F_ACK
)
netlink_ack
(
skb
,
nlh
,
0
);
}
static
void
ipq_rcv_skb
(
struct
sk_buff
*
skb
)
{
mutex_lock
(
&
ipqnl_mutex
);
__ipq_rcv_skb
(
skb
);
mutex_unlock
(
&
ipqnl_mutex
);
}
static
int
ipq_rcv_dev_event
(
struct
notifier_block
*
this
,
unsigned
long
event
,
void
*
ptr
)
{
struct
net_device
*
dev
=
ptr
;
if
(
!
net_eq
(
dev_net
(
dev
),
&
init_net
))
return
NOTIFY_DONE
;
/* Drop any packets associated with the downed device */
if
(
event
==
NETDEV_DOWN
)
ipq_dev_drop
(
dev
->
ifindex
);
return
NOTIFY_DONE
;
}
static
struct
notifier_block
ipq_dev_notifier
=
{
.
notifier_call
=
ipq_rcv_dev_event
,
};
static
int
ipq_rcv_nl_event
(
struct
notifier_block
*
this
,
unsigned
long
event
,
void
*
ptr
)
{
struct
netlink_notify
*
n
=
ptr
;
if
(
event
==
NETLINK_URELEASE
&&
n
->
protocol
==
NETLINK_FIREWALL
)
{
spin_lock_bh
(
&
queue_lock
);
if
((
net_eq
(
n
->
net
,
&
init_net
))
&&
(
n
->
pid
==
peer_pid
))
__ipq_reset
();
spin_unlock_bh
(
&
queue_lock
);
}
return
NOTIFY_DONE
;
}
static
struct
notifier_block
ipq_nl_notifier
=
{
.
notifier_call
=
ipq_rcv_nl_event
,
};
#ifdef CONFIG_SYSCTL
static
struct
ctl_table_header
*
ipq_sysctl_header
;
static
ctl_table
ipq_table
[]
=
{
{
.
procname
=
NET_IPQ_QMAX_NAME
,
.
data
=
&
queue_maxlen
,
.
maxlen
=
sizeof
(
queue_maxlen
),
.
mode
=
0644
,
.
proc_handler
=
proc_dointvec
},
{
}
};
#endif
#ifdef CONFIG_PROC_FS
static
int
ip_queue_show
(
struct
seq_file
*
m
,
void
*
v
)
{
spin_lock_bh
(
&
queue_lock
);
seq_printf
(
m
,
"Peer PID : %d
\n
"
"Copy mode : %hu
\n
"
"Copy range : %u
\n
"
"Queue length : %u
\n
"
"Queue max. length : %u
\n
"
"Queue dropped : %u
\n
"
"Netlink dropped : %u
\n
"
,
peer_pid
,
copy_mode
,
copy_range
,
queue_total
,
queue_maxlen
,
queue_dropped
,
queue_user_dropped
);
spin_unlock_bh
(
&
queue_lock
);
return
0
;
}
static
int
ip_queue_open
(
struct
inode
*
inode
,
struct
file
*
file
)
{
return
single_open
(
file
,
ip_queue_show
,
NULL
);
}
static
const
struct
file_operations
ip_queue_proc_fops
=
{
.
open
=
ip_queue_open
,
.
read
=
seq_read
,
.
llseek
=
seq_lseek
,
.
release
=
single_release
,
.
owner
=
THIS_MODULE
,
};
#endif
static
const
struct
nf_queue_handler
nfqh
=
{
.
name
=
"ip_queue"
,
.
outfn
=
&
ipq_enqueue_packet
,
};
static
int
__init
ip_queue_init
(
void
)
{
int
status
=
-
ENOMEM
;
struct
proc_dir_entry
*
proc
__maybe_unused
;
netlink_register_notifier
(
&
ipq_nl_notifier
);
ipqnl
=
netlink_kernel_create
(
&
init_net
,
NETLINK_FIREWALL
,
0
,
ipq_rcv_skb
,
NULL
,
THIS_MODULE
);
if
(
ipqnl
==
NULL
)
{
printk
(
KERN_ERR
"ip_queue: failed to create netlink socket
\n
"
);
goto
cleanup_netlink_notifier
;
}
#ifdef CONFIG_PROC_FS
proc
=
proc_create
(
IPQ_PROC_FS_NAME
,
0
,
init_net
.
proc_net
,
&
ip_queue_proc_fops
);
if
(
!
proc
)
{
printk
(
KERN_ERR
"ip_queue: failed to create proc entry
\n
"
);
goto
cleanup_ipqnl
;
}
#endif
register_netdevice_notifier
(
&
ipq_dev_notifier
);
#ifdef CONFIG_SYSCTL
ipq_sysctl_header
=
register_net_sysctl
(
&
init_net
,
"net/ipv4"
,
ipq_table
);
#endif
status
=
nf_register_queue_handler
(
NFPROTO_IPV4
,
&
nfqh
);
if
(
status
<
0
)
{
printk
(
KERN_ERR
"ip_queue: failed to register queue handler
\n
"
);
goto
cleanup_sysctl
;
}
return
status
;
cleanup_sysctl:
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table
(
ipq_sysctl_header
);
#endif
unregister_netdevice_notifier
(
&
ipq_dev_notifier
);
proc_net_remove
(
&
init_net
,
IPQ_PROC_FS_NAME
);
cleanup_ipqnl:
__maybe_unused
netlink_kernel_release
(
ipqnl
);
mutex_lock
(
&
ipqnl_mutex
);
mutex_unlock
(
&
ipqnl_mutex
);
cleanup_netlink_notifier:
netlink_unregister_notifier
(
&
ipq_nl_notifier
);
return
status
;
}
static
void
__exit
ip_queue_fini
(
void
)
{
nf_unregister_queue_handlers
(
&
nfqh
);
ipq_flush
(
NULL
,
0
);
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table
(
ipq_sysctl_header
);
#endif
unregister_netdevice_notifier
(
&
ipq_dev_notifier
);
proc_net_remove
(
&
init_net
,
IPQ_PROC_FS_NAME
);
netlink_kernel_release
(
ipqnl
);
mutex_lock
(
&
ipqnl_mutex
);
mutex_unlock
(
&
ipqnl_mutex
);
netlink_unregister_notifier
(
&
ipq_nl_notifier
);
}
MODULE_DESCRIPTION
(
"IPv4 packet queue handler"
);
MODULE_AUTHOR
(
"James Morris <jmorris@intercode.com.au>"
);
MODULE_LICENSE
(
"GPL"
);
MODULE_ALIAS_NET_PF_PROTO
(
PF_NETLINK
,
NETLINK_FIREWALL
);
module_init
(
ip_queue_init
);
module_exit
(
ip_queue_fini
);
net/ipv6/netfilter/Kconfig
View file @
9bb862be
...
@@ -25,28 +25,6 @@ config NF_CONNTRACK_IPV6
...
@@ -25,28 +25,6 @@ config NF_CONNTRACK_IPV6
To compile it as a module, choose M here. If unsure, say N.
To compile it as a module, choose M here. If unsure, say N.
config IP6_NF_QUEUE
tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
depends on INET && IPV6 && NETFILTER
depends on NETFILTER_ADVANCED
---help---
This option adds a queue handler to the kernel for IPv6
packets which enables users to receive the filtered packets
with QUEUE target using libipq.
This option enables the old IPv6-only "ip6_queue" implementation
which has been obsoleted by the new "nfnetlink_queue" code (see
CONFIG_NETFILTER_NETLINK_QUEUE).
(C) Fernando Anton 2001
IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
Universidad Carlos III de Madrid
Universidad Politecnica de Alcala de Henares
email: <fanton@it.uc3m.es>.
To compile it as a module, choose M here. If unsure, say N.
config IP6_NF_IPTABLES
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
depends on INET && IPV6
...
...
net/ipv6/netfilter/Makefile
View file @
9bb862be
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
obj-$(CONFIG_IP6_NF_IPTABLES)
+=
ip6_tables.o
obj-$(CONFIG_IP6_NF_IPTABLES)
+=
ip6_tables.o
obj-$(CONFIG_IP6_NF_FILTER)
+=
ip6table_filter.o
obj-$(CONFIG_IP6_NF_FILTER)
+=
ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE)
+=
ip6table_mangle.o
obj-$(CONFIG_IP6_NF_MANGLE)
+=
ip6table_mangle.o
obj-$(CONFIG_IP6_NF_QUEUE)
+=
ip6_queue.o
obj-$(CONFIG_IP6_NF_RAW)
+=
ip6table_raw.o
obj-$(CONFIG_IP6_NF_RAW)
+=
ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY)
+=
ip6table_security.o
obj-$(CONFIG_IP6_NF_SECURITY)
+=
ip6table_security.o
...
...
net/ipv6/netfilter/ip6_queue.c
deleted
100644 → 0
View file @
b44907e6
/*
* This is a module which is used for queueing IPv6 packets and
* communicating with userspace via netlink.
*
* (C) 2001 Fernando Anton, this code is GPL.
* IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
* Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
* Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
* email: fanton@it.uc3m.es
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/ipv6.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/netfilter/nf_queue.h>
#include <linux/netfilter_ipv4/ip_queue.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#define IPQ_QMAX_DEFAULT 1024
#define IPQ_PROC_FS_NAME "ip6_queue"
#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
typedef
int
(
*
ipq_cmpfn
)(
struct
nf_queue_entry
*
,
unsigned
long
);
static
unsigned
char
copy_mode
__read_mostly
=
IPQ_COPY_NONE
;
static
unsigned
int
queue_maxlen
__read_mostly
=
IPQ_QMAX_DEFAULT
;
static
DEFINE_SPINLOCK
(
queue_lock
);
static
int
peer_pid
__read_mostly
;
static
unsigned
int
copy_range
__read_mostly
;
static
unsigned
int
queue_total
;
static
unsigned
int
queue_dropped
=
0
;
static
unsigned
int
queue_user_dropped
=
0
;
static
struct
sock
*
ipqnl
__read_mostly
;
static
LIST_HEAD
(
queue_list
);
static
DEFINE_MUTEX
(
ipqnl_mutex
);
static
inline
void
__ipq_enqueue_entry
(
struct
nf_queue_entry
*
entry
)
{
list_add_tail
(
&
entry
->
list
,
&
queue_list
);
queue_total
++
;
}
static
inline
int
__ipq_set_mode
(
unsigned
char
mode
,
unsigned
int
range
)
{
int
status
=
0
;
switch
(
mode
)
{
case
IPQ_COPY_NONE
:
case
IPQ_COPY_META
:
copy_mode
=
mode
;
copy_range
=
0
;
break
;
case
IPQ_COPY_PACKET
:
if
(
range
>
0xFFFF
)
range
=
0xFFFF
;
copy_range
=
range
;
copy_mode
=
mode
;
break
;
default:
status
=
-
EINVAL
;
}
return
status
;
}
static
void
__ipq_flush
(
ipq_cmpfn
cmpfn
,
unsigned
long
data
);
static
inline
void
__ipq_reset
(
void
)
{
peer_pid
=
0
;
net_disable_timestamp
();
__ipq_set_mode
(
IPQ_COPY_NONE
,
0
);
__ipq_flush
(
NULL
,
0
);
}
static
struct
nf_queue_entry
*
ipq_find_dequeue_entry
(
unsigned
long
id
)
{
struct
nf_queue_entry
*
entry
=
NULL
,
*
i
;
spin_lock_bh
(
&
queue_lock
);
list_for_each_entry
(
i
,
&
queue_list
,
list
)
{
if
((
unsigned
long
)
i
==
id
)
{
entry
=
i
;
break
;
}
}
if
(
entry
)
{
list_del
(
&
entry
->
list
);
queue_total
--
;
}
spin_unlock_bh
(
&
queue_lock
);
return
entry
;
}
static
void
__ipq_flush
(
ipq_cmpfn
cmpfn
,
unsigned
long
data
)
{
struct
nf_queue_entry
*
entry
,
*
next
;
list_for_each_entry_safe
(
entry
,
next
,
&
queue_list
,
list
)
{
if
(
!
cmpfn
||
cmpfn
(
entry
,
data
))
{
list_del
(
&
entry
->
list
);
queue_total
--
;
nf_reinject
(
entry
,
NF_DROP
);
}
}
}
static
void
ipq_flush
(
ipq_cmpfn
cmpfn
,
unsigned
long
data
)
{
spin_lock_bh
(
&
queue_lock
);
__ipq_flush
(
cmpfn
,
data
);
spin_unlock_bh
(
&
queue_lock
);
}
static
struct
sk_buff
*
ipq_build_packet_message
(
struct
nf_queue_entry
*
entry
,
int
*
errp
)
{
sk_buff_data_t
old_tail
;
size_t
size
=
0
;
size_t
data_len
=
0
;
struct
sk_buff
*
skb
;
struct
ipq_packet_msg
*
pmsg
;
struct
nlmsghdr
*
nlh
;
struct
timeval
tv
;
switch
(
ACCESS_ONCE
(
copy_mode
))
{
case
IPQ_COPY_META
:
case
IPQ_COPY_NONE
:
size
=
NLMSG_SPACE
(
sizeof
(
*
pmsg
));
break
;
case
IPQ_COPY_PACKET
:
if
(
entry
->
skb
->
ip_summed
==
CHECKSUM_PARTIAL
&&
(
*
errp
=
skb_checksum_help
(
entry
->
skb
)))
return
NULL
;
data_len
=
ACCESS_ONCE
(
copy_range
);
if
(
data_len
==
0
||
data_len
>
entry
->
skb
->
len
)
data_len
=
entry
->
skb
->
len
;
size
=
NLMSG_SPACE
(
sizeof
(
*
pmsg
)
+
data_len
);
break
;
default:
*
errp
=
-
EINVAL
;
return
NULL
;
}
skb
=
alloc_skb
(
size
,
GFP_ATOMIC
);
if
(
!
skb
)
goto
nlmsg_failure
;
old_tail
=
skb
->
tail
;
nlh
=
NLMSG_PUT
(
skb
,
0
,
0
,
IPQM_PACKET
,
size
-
sizeof
(
*
nlh
));
pmsg
=
NLMSG_DATA
(
nlh
);
memset
(
pmsg
,
0
,
sizeof
(
*
pmsg
));
pmsg
->
packet_id
=
(
unsigned
long
)
entry
;
pmsg
->
data_len
=
data_len
;
tv
=
ktime_to_timeval
(
entry
->
skb
->
tstamp
);
pmsg
->
timestamp_sec
=
tv
.
tv_sec
;
pmsg
->
timestamp_usec
=
tv
.
tv_usec
;
pmsg
->
mark
=
entry
->
skb
->
mark
;
pmsg
->
hook
=
entry
->
hook
;
pmsg
->
hw_protocol
=
entry
->
skb
->
protocol
;
if
(
entry
->
indev
)
strcpy
(
pmsg
->
indev_name
,
entry
->
indev
->
name
);
else
pmsg
->
indev_name
[
0
]
=
'\0'
;
if
(
entry
->
outdev
)
strcpy
(
pmsg
->
outdev_name
,
entry
->
outdev
->
name
);
else
pmsg
->
outdev_name
[
0
]
=
'\0'
;
if
(
entry
->
indev
&&
entry
->
skb
->
dev
&&
entry
->
skb
->
mac_header
!=
entry
->
skb
->
network_header
)
{
pmsg
->
hw_type
=
entry
->
skb
->
dev
->
type
;
pmsg
->
hw_addrlen
=
dev_parse_header
(
entry
->
skb
,
pmsg
->
hw_addr
);
}
if
(
data_len
)
if
(
skb_copy_bits
(
entry
->
skb
,
0
,
pmsg
->
payload
,
data_len
))
BUG
();
nlh
->
nlmsg_len
=
skb
->
tail
-
old_tail
;
return
skb
;
nlmsg_failure:
kfree_skb
(
skb
);
*
errp
=
-
EINVAL
;
printk
(
KERN_ERR
"ip6_queue: error creating packet message
\n
"
);
return
NULL
;
}
static
int
ipq_enqueue_packet
(
struct
nf_queue_entry
*
entry
,
unsigned
int
queuenum
)
{
int
status
=
-
EINVAL
;
struct
sk_buff
*
nskb
;
if
(
copy_mode
==
IPQ_COPY_NONE
)
return
-
EAGAIN
;
nskb
=
ipq_build_packet_message
(
entry
,
&
status
);
if
(
nskb
==
NULL
)
return
status
;
spin_lock_bh
(
&
queue_lock
);
if
(
!
peer_pid
)
goto
err_out_free_nskb
;
if
(
queue_total
>=
queue_maxlen
)
{
queue_dropped
++
;
status
=
-
ENOSPC
;
if
(
net_ratelimit
())
printk
(
KERN_WARNING
"ip6_queue: fill at %d entries, "
"dropping packet(s). Dropped: %d
\n
"
,
queue_total
,
queue_dropped
);
goto
err_out_free_nskb
;
}
/* netlink_unicast will either free the nskb or attach it to a socket */
status
=
netlink_unicast
(
ipqnl
,
nskb
,
peer_pid
,
MSG_DONTWAIT
);
if
(
status
<
0
)
{
queue_user_dropped
++
;
goto
err_out_unlock
;
}
__ipq_enqueue_entry
(
entry
);
spin_unlock_bh
(
&
queue_lock
);
return
status
;
err_out_free_nskb:
kfree_skb
(
nskb
);
err_out_unlock:
spin_unlock_bh
(
&
queue_lock
);
return
status
;
}
static
int
ipq_mangle_ipv6
(
ipq_verdict_msg_t
*
v
,
struct
nf_queue_entry
*
e
)
{
int
diff
;
struct
ipv6hdr
*
user_iph
=
(
struct
ipv6hdr
*
)
v
->
payload
;
struct
sk_buff
*
nskb
;
if
(
v
->
data_len
<
sizeof
(
*
user_iph
))
return
0
;
diff
=
v
->
data_len
-
e
->
skb
->
len
;
if
(
diff
<
0
)
{
if
(
pskb_trim
(
e
->
skb
,
v
->
data_len
))
return
-
ENOMEM
;
}
else
if
(
diff
>
0
)
{
if
(
v
->
data_len
>
0xFFFF
)
return
-
EINVAL
;
if
(
diff
>
skb_tailroom
(
e
->
skb
))
{
nskb
=
skb_copy_expand
(
e
->
skb
,
skb_headroom
(
e
->
skb
),
diff
,
GFP_ATOMIC
);
if
(
!
nskb
)
{
printk
(
KERN_WARNING
"ip6_queue: OOM "
"in mangle, dropping packet
\n
"
);
return
-
ENOMEM
;
}
kfree_skb
(
e
->
skb
);
e
->
skb
=
nskb
;
}
skb_put
(
e
->
skb
,
diff
);
}
if
(
!
skb_make_writable
(
e
->
skb
,
v
->
data_len
))
return
-
ENOMEM
;
skb_copy_to_linear_data
(
e
->
skb
,
v
->
payload
,
v
->
data_len
);
e
->
skb
->
ip_summed
=
CHECKSUM_NONE
;
return
0
;
}
static
int
ipq_set_verdict
(
struct
ipq_verdict_msg
*
vmsg
,
unsigned
int
len
)
{
struct
nf_queue_entry
*
entry
;
if
(
vmsg
->
value
>
NF_MAX_VERDICT
||
vmsg
->
value
==
NF_STOLEN
)
return
-
EINVAL
;
entry
=
ipq_find_dequeue_entry
(
vmsg
->
id
);
if
(
entry
==
NULL
)
return
-
ENOENT
;
else
{
int
verdict
=
vmsg
->
value
;
if
(
vmsg
->
data_len
&&
vmsg
->
data_len
==
len
)
if
(
ipq_mangle_ipv6
(
vmsg
,
entry
)
<
0
)
verdict
=
NF_DROP
;
nf_reinject
(
entry
,
verdict
);
return
0
;
}
}
static
int
ipq_set_mode
(
unsigned
char
mode
,
unsigned
int
range
)
{
int
status
;
spin_lock_bh
(
&
queue_lock
);
status
=
__ipq_set_mode
(
mode
,
range
);
spin_unlock_bh
(
&
queue_lock
);
return
status
;
}
static
int
ipq_receive_peer
(
struct
ipq_peer_msg
*
pmsg
,
unsigned
char
type
,
unsigned
int
len
)
{
int
status
=
0
;
if
(
len
<
sizeof
(
*
pmsg
))
return
-
EINVAL
;
switch
(
type
)
{
case
IPQM_MODE
:
status
=
ipq_set_mode
(
pmsg
->
msg
.
mode
.
value
,
pmsg
->
msg
.
mode
.
range
);
break
;
case
IPQM_VERDICT
:
status
=
ipq_set_verdict
(
&
pmsg
->
msg
.
verdict
,
len
-
sizeof
(
*
pmsg
));
break
;
default:
status
=
-
EINVAL
;
}
return
status
;
}
static
int
dev_cmp
(
struct
nf_queue_entry
*
entry
,
unsigned
long
ifindex
)
{
if
(
entry
->
indev
)
if
(
entry
->
indev
->
ifindex
==
ifindex
)
return
1
;
if
(
entry
->
outdev
)
if
(
entry
->
outdev
->
ifindex
==
ifindex
)
return
1
;
#ifdef CONFIG_BRIDGE_NETFILTER
if
(
entry
->
skb
->
nf_bridge
)
{
if
(
entry
->
skb
->
nf_bridge
->
physindev
&&
entry
->
skb
->
nf_bridge
->
physindev
->
ifindex
==
ifindex
)
return
1
;
if
(
entry
->
skb
->
nf_bridge
->
physoutdev
&&
entry
->
skb
->
nf_bridge
->
physoutdev
->
ifindex
==
ifindex
)
return
1
;
}
#endif
return
0
;
}
static
void
ipq_dev_drop
(
int
ifindex
)
{
ipq_flush
(
dev_cmp
,
ifindex
);
}
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
static
inline
void
__ipq_rcv_skb
(
struct
sk_buff
*
skb
)
{
int
status
,
type
,
pid
,
flags
;
unsigned
int
nlmsglen
,
skblen
;
struct
nlmsghdr
*
nlh
;
bool
enable_timestamp
=
false
;
skblen
=
skb
->
len
;
if
(
skblen
<
sizeof
(
*
nlh
))
return
;
nlh
=
nlmsg_hdr
(
skb
);
nlmsglen
=
nlh
->
nlmsg_len
;
if
(
nlmsglen
<
sizeof
(
*
nlh
)
||
skblen
<
nlmsglen
)
return
;
pid
=
nlh
->
nlmsg_pid
;
flags
=
nlh
->
nlmsg_flags
;
if
(
pid
<=
0
||
!
(
flags
&
NLM_F_REQUEST
)
||
flags
&
NLM_F_MULTI
)
RCV_SKB_FAIL
(
-
EINVAL
);
if
(
flags
&
MSG_TRUNC
)
RCV_SKB_FAIL
(
-
ECOMM
);
type
=
nlh
->
nlmsg_type
;
if
(
type
<
NLMSG_NOOP
||
type
>=
IPQM_MAX
)
RCV_SKB_FAIL
(
-
EINVAL
);
if
(
type
<=
IPQM_BASE
)
return
;
if
(
!
capable
(
CAP_NET_ADMIN
))
RCV_SKB_FAIL
(
-
EPERM
);
spin_lock_bh
(
&
queue_lock
);
if
(
peer_pid
)
{
if
(
peer_pid
!=
pid
)
{
spin_unlock_bh
(
&
queue_lock
);
RCV_SKB_FAIL
(
-
EBUSY
);
}
}
else
{
enable_timestamp
=
true
;
peer_pid
=
pid
;
}
spin_unlock_bh
(
&
queue_lock
);
if
(
enable_timestamp
)
net_enable_timestamp
();
status
=
ipq_receive_peer
(
NLMSG_DATA
(
nlh
),
type
,
nlmsglen
-
NLMSG_LENGTH
(
0
));
if
(
status
<
0
)
RCV_SKB_FAIL
(
status
);
if
(
flags
&
NLM_F_ACK
)
netlink_ack
(
skb
,
nlh
,
0
);
}
static
void
ipq_rcv_skb
(
struct
sk_buff
*
skb
)
{
mutex_lock
(
&
ipqnl_mutex
);
__ipq_rcv_skb
(
skb
);
mutex_unlock
(
&
ipqnl_mutex
);
}
static
int
ipq_rcv_dev_event
(
struct
notifier_block
*
this
,
unsigned
long
event
,
void
*
ptr
)
{
struct
net_device
*
dev
=
ptr
;
if
(
!
net_eq
(
dev_net
(
dev
),
&
init_net
))
return
NOTIFY_DONE
;
/* Drop any packets associated with the downed device */
if
(
event
==
NETDEV_DOWN
)
ipq_dev_drop
(
dev
->
ifindex
);
return
NOTIFY_DONE
;
}
static
struct
notifier_block
ipq_dev_notifier
=
{
.
notifier_call
=
ipq_rcv_dev_event
,
};
static
int
ipq_rcv_nl_event
(
struct
notifier_block
*
this
,
unsigned
long
event
,
void
*
ptr
)
{
struct
netlink_notify
*
n
=
ptr
;
if
(
event
==
NETLINK_URELEASE
&&
n
->
protocol
==
NETLINK_IP6_FW
)
{
spin_lock_bh
(
&
queue_lock
);
if
((
net_eq
(
n
->
net
,
&
init_net
))
&&
(
n
->
pid
==
peer_pid
))
__ipq_reset
();
spin_unlock_bh
(
&
queue_lock
);
}
return
NOTIFY_DONE
;
}
static
struct
notifier_block
ipq_nl_notifier
=
{
.
notifier_call
=
ipq_rcv_nl_event
,
};
#ifdef CONFIG_SYSCTL
static
struct
ctl_table_header
*
ipq_sysctl_header
;
static
ctl_table
ipq_table
[]
=
{
{
.
procname
=
NET_IPQ_QMAX_NAME
,
.
data
=
&
queue_maxlen
,
.
maxlen
=
sizeof
(
queue_maxlen
),
.
mode
=
0644
,
.
proc_handler
=
proc_dointvec
},
{
}
};
#endif
#ifdef CONFIG_PROC_FS
static
int
ip6_queue_show
(
struct
seq_file
*
m
,
void
*
v
)
{
spin_lock_bh
(
&
queue_lock
);
seq_printf
(
m
,
"Peer PID : %d
\n
"
"Copy mode : %hu
\n
"
"Copy range : %u
\n
"
"Queue length : %u
\n
"
"Queue max. length : %u
\n
"
"Queue dropped : %u
\n
"
"Netfilter dropped : %u
\n
"
,
peer_pid
,
copy_mode
,
copy_range
,
queue_total
,
queue_maxlen
,
queue_dropped
,
queue_user_dropped
);
spin_unlock_bh
(
&
queue_lock
);
return
0
;
}
static
int
ip6_queue_open
(
struct
inode
*
inode
,
struct
file
*
file
)
{
return
single_open
(
file
,
ip6_queue_show
,
NULL
);
}
static
const
struct
file_operations
ip6_queue_proc_fops
=
{
.
open
=
ip6_queue_open
,
.
read
=
seq_read
,
.
llseek
=
seq_lseek
,
.
release
=
single_release
,
.
owner
=
THIS_MODULE
,
};
#endif
static
const
struct
nf_queue_handler
nfqh
=
{
.
name
=
"ip6_queue"
,
.
outfn
=
&
ipq_enqueue_packet
,
};
static
int
__init
ip6_queue_init
(
void
)
{
int
status
=
-
ENOMEM
;
struct
proc_dir_entry
*
proc
__maybe_unused
;
netlink_register_notifier
(
&
ipq_nl_notifier
);
ipqnl
=
netlink_kernel_create
(
&
init_net
,
NETLINK_IP6_FW
,
0
,
ipq_rcv_skb
,
NULL
,
THIS_MODULE
);
if
(
ipqnl
==
NULL
)
{
printk
(
KERN_ERR
"ip6_queue: failed to create netlink socket
\n
"
);
goto
cleanup_netlink_notifier
;
}
#ifdef CONFIG_PROC_FS
proc
=
proc_create
(
IPQ_PROC_FS_NAME
,
0
,
init_net
.
proc_net
,
&
ip6_queue_proc_fops
);
if
(
!
proc
)
{
printk
(
KERN_ERR
"ip6_queue: failed to create proc entry
\n
"
);
goto
cleanup_ipqnl
;
}
#endif
register_netdevice_notifier
(
&
ipq_dev_notifier
);
#ifdef CONFIG_SYSCTL
ipq_sysctl_header
=
register_net_sysctl
(
&
init_net
,
"net/ipv6"
,
ipq_table
);
#endif
status
=
nf_register_queue_handler
(
NFPROTO_IPV6
,
&
nfqh
);
if
(
status
<
0
)
{
printk
(
KERN_ERR
"ip6_queue: failed to register queue handler
\n
"
);
goto
cleanup_sysctl
;
}
return
status
;
cleanup_sysctl:
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table
(
ipq_sysctl_header
);
#endif
unregister_netdevice_notifier
(
&
ipq_dev_notifier
);
proc_net_remove
(
&
init_net
,
IPQ_PROC_FS_NAME
);
cleanup_ipqnl:
__maybe_unused
netlink_kernel_release
(
ipqnl
);
mutex_lock
(
&
ipqnl_mutex
);
mutex_unlock
(
&
ipqnl_mutex
);
cleanup_netlink_notifier:
netlink_unregister_notifier
(
&
ipq_nl_notifier
);
return
status
;
}
static
void
__exit
ip6_queue_fini
(
void
)
{
nf_unregister_queue_handlers
(
&
nfqh
);
ipq_flush
(
NULL
,
0
);
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table
(
ipq_sysctl_header
);
#endif
unregister_netdevice_notifier
(
&
ipq_dev_notifier
);
proc_net_remove
(
&
init_net
,
IPQ_PROC_FS_NAME
);
netlink_kernel_release
(
ipqnl
);
mutex_lock
(
&
ipqnl_mutex
);
mutex_unlock
(
&
ipqnl_mutex
);
netlink_unregister_notifier
(
&
ipq_nl_notifier
);
}
MODULE_DESCRIPTION
(
"IPv6 packet queue handler"
);
MODULE_LICENSE
(
"GPL"
);
MODULE_ALIAS_NET_PF_PROTO
(
PF_NETLINK
,
NETLINK_IP6_FW
);
module_init
(
ip6_queue_init
);
module_exit
(
ip6_queue_fini
);
net/netfilter/ipvs/ip_vs_conn.c
View file @
9bb862be
...
@@ -548,6 +548,7 @@ static inline void
...
@@ -548,6 +548,7 @@ static inline void
ip_vs_bind_dest
(
struct
ip_vs_conn
*
cp
,
struct
ip_vs_dest
*
dest
)
ip_vs_bind_dest
(
struct
ip_vs_conn
*
cp
,
struct
ip_vs_dest
*
dest
)
{
{
unsigned
int
conn_flags
;
unsigned
int
conn_flags
;
__u32
flags
;
/* if dest is NULL, then return directly */
/* if dest is NULL, then return directly */
if
(
!
dest
)
if
(
!
dest
)
...
@@ -559,17 +560,19 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
...
@@ -559,17 +560,19 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
conn_flags
=
atomic_read
(
&
dest
->
conn_flags
);
conn_flags
=
atomic_read
(
&
dest
->
conn_flags
);
if
(
cp
->
protocol
!=
IPPROTO_UDP
)
if
(
cp
->
protocol
!=
IPPROTO_UDP
)
conn_flags
&=
~
IP_VS_CONN_F_ONE_PACKET
;
conn_flags
&=
~
IP_VS_CONN_F_ONE_PACKET
;
flags
=
cp
->
flags
;
/* Bind with the destination and its corresponding transmitter */
/* Bind with the destination and its corresponding transmitter */
if
(
cp
->
flags
&
IP_VS_CONN_F_SYNC
)
{
if
(
flags
&
IP_VS_CONN_F_SYNC
)
{
/* if the connection is not template and is created
/* if the connection is not template and is created
* by sync, preserve the activity flag.
* by sync, preserve the activity flag.
*/
*/
if
(
!
(
cp
->
flags
&
IP_VS_CONN_F_TEMPLATE
))
if
(
!
(
flags
&
IP_VS_CONN_F_TEMPLATE
))
conn_flags
&=
~
IP_VS_CONN_F_INACTIVE
;
conn_flags
&=
~
IP_VS_CONN_F_INACTIVE
;
/* connections inherit forwarding method from dest */
/* connections inherit forwarding method from dest */
cp
->
flags
&=
~
IP_VS_CONN_F_FWD_MASK
;
flags
&=
~
(
IP_VS_CONN_F_FWD_MASK
|
IP_VS_CONN_F_NOOUTPUT
)
;
}
}
cp
->
flags
|=
conn_flags
;
flags
|=
conn_flags
;
cp
->
flags
=
flags
;
cp
->
dest
=
dest
;
cp
->
dest
=
dest
;
IP_VS_DBG_BUF
(
7
,
"Bind-dest %s c:%s:%d v:%s:%d "
IP_VS_DBG_BUF
(
7
,
"Bind-dest %s c:%s:%d v:%s:%d "
...
@@ -584,12 +587,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
...
@@ -584,12 +587,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
atomic_read
(
&
dest
->
refcnt
));
atomic_read
(
&
dest
->
refcnt
));
/* Update the connection counters */
/* Update the connection counters */
if
(
!
(
cp
->
flags
&
IP_VS_CONN_F_TEMPLATE
))
{
if
(
!
(
flags
&
IP_VS_CONN_F_TEMPLATE
))
{
/* It is a normal connection, so
increase the inactive
/* It is a normal connection, so
modify the counters
connection counter because it is in TCP SYNRECV
* according to the flags, later the protocol can
state (inactive) or other protocol inacive state */
* update them on state change
if
((
cp
->
flags
&
IP_VS_CONN_F_SYNC
)
&&
*/
(
!
(
cp
->
flags
&
IP_VS_CONN_F_INACTIVE
)
))
if
(
!
(
flags
&
IP_VS_CONN_F_INACTIVE
))
atomic_inc
(
&
dest
->
activeconns
);
atomic_inc
(
&
dest
->
activeconns
);
else
else
atomic_inc
(
&
dest
->
inactconns
);
atomic_inc
(
&
dest
->
inactconns
);
...
@@ -613,14 +616,40 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
...
@@ -613,14 +616,40 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
{
{
struct
ip_vs_dest
*
dest
;
struct
ip_vs_dest
*
dest
;
if
((
cp
)
&&
(
!
cp
->
dest
))
{
dest
=
ip_vs_find_dest
(
ip_vs_conn_net
(
cp
),
cp
->
af
,
&
cp
->
daddr
,
dest
=
ip_vs_find_dest
(
ip_vs_conn_net
(
cp
),
cp
->
af
,
&
cp
->
daddr
,
cp
->
dport
,
&
cp
->
vaddr
,
cp
->
vport
,
cp
->
dport
,
&
cp
->
vaddr
,
cp
->
vport
,
cp
->
protocol
,
cp
->
fwmark
,
cp
->
flags
);
cp
->
protocol
,
cp
->
fwmark
,
cp
->
flags
);
if
(
dest
)
{
struct
ip_vs_proto_data
*
pd
;
spin_lock
(
&
cp
->
lock
);
if
(
cp
->
dest
)
{
spin_unlock
(
&
cp
->
lock
);
return
dest
;
}
/* Applications work depending on the forwarding method
* but better to reassign them always when binding dest */
if
(
cp
->
app
)
ip_vs_unbind_app
(
cp
);
ip_vs_bind_dest
(
cp
,
dest
);
ip_vs_bind_dest
(
cp
,
dest
);
spin_unlock
(
&
cp
->
lock
);
/* Update its packet transmitter */
cp
->
packet_xmit
=
NULL
;
#ifdef CONFIG_IP_VS_IPV6
if
(
cp
->
af
==
AF_INET6
)
ip_vs_bind_xmit_v6
(
cp
);
else
#endif
ip_vs_bind_xmit
(
cp
);
pd
=
ip_vs_proto_data_get
(
ip_vs_conn_net
(
cp
),
cp
->
protocol
);
if
(
pd
&&
atomic_read
(
&
pd
->
appcnt
))
ip_vs_bind_app
(
cp
,
pd
->
pp
);
}
return
dest
;
return
dest
;
}
else
return
NULL
;
}
}
...
@@ -743,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
...
@@ -743,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
static
void
ip_vs_conn_expire
(
unsigned
long
data
)
static
void
ip_vs_conn_expire
(
unsigned
long
data
)
{
{
struct
ip_vs_conn
*
cp
=
(
struct
ip_vs_conn
*
)
data
;
struct
ip_vs_conn
*
cp
=
(
struct
ip_vs_conn
*
)
data
;
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
ip_vs_conn_net
(
cp
));
struct
net
*
net
=
ip_vs_conn_net
(
cp
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
cp
->
timeout
=
60
*
HZ
;
cp
->
timeout
=
60
*
HZ
;
...
@@ -808,6 +838,9 @@ static void ip_vs_conn_expire(unsigned long data)
...
@@ -808,6 +838,9 @@ static void ip_vs_conn_expire(unsigned long data)
atomic_read
(
&
cp
->
refcnt
)
-
1
,
atomic_read
(
&
cp
->
refcnt
)
-
1
,
atomic_read
(
&
cp
->
n_control
));
atomic_read
(
&
cp
->
n_control
));
if
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
)
ip_vs_sync_conn
(
net
,
cp
,
sysctl_sync_threshold
(
ipvs
));
ip_vs_conn_put
(
cp
);
ip_vs_conn_put
(
cp
);
}
}
...
@@ -881,6 +914,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
...
@@ -881,6 +914,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
/* Set its state and timeout */
/* Set its state and timeout */
cp
->
state
=
0
;
cp
->
state
=
0
;
cp
->
timeout
=
3
*
HZ
;
cp
->
timeout
=
3
*
HZ
;
cp
->
sync_endtime
=
jiffies
&
~
3UL
;
/* Bind its packet transmitter */
/* Bind its packet transmitter */
#ifdef CONFIG_IP_VS_IPV6
#ifdef CONFIG_IP_VS_IPV6
...
...
net/netfilter/ipvs/ip_vs_core.c
View file @
9bb862be
...
@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
...
@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
else
else
pkts
=
atomic_add_return
(
1
,
&
cp
->
in_pkts
);
pkts
=
atomic_add_return
(
1
,
&
cp
->
in_pkts
);
if
((
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
)
&&
if
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
)
cp
->
protocol
==
IPPROTO_SCTP
)
{
ip_vs_sync_conn
(
net
,
cp
,
pkts
);
if
((
cp
->
state
==
IP_VS_SCTP_S_ESTABLISHED
&&
(
pkts
%
sysctl_sync_period
(
ipvs
)
==
sysctl_sync_threshold
(
ipvs
)))
||
(
cp
->
old_state
!=
cp
->
state
&&
((
cp
->
state
==
IP_VS_SCTP_S_CLOSED
)
||
(
cp
->
state
==
IP_VS_SCTP_S_SHUT_ACK_CLI
)
||
(
cp
->
state
==
IP_VS_SCTP_S_SHUT_ACK_SER
))))
{
ip_vs_sync_conn
(
net
,
cp
);
goto
out
;
}
}
/* Keep this block last: TCP and others with pp->num_states <= 1 */
else
if
((
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
)
&&
(((
cp
->
protocol
!=
IPPROTO_TCP
||
cp
->
state
==
IP_VS_TCP_S_ESTABLISHED
)
&&
(
pkts
%
sysctl_sync_period
(
ipvs
)
==
sysctl_sync_threshold
(
ipvs
)))
||
((
cp
->
protocol
==
IPPROTO_TCP
)
&&
(
cp
->
old_state
!=
cp
->
state
)
&&
((
cp
->
state
==
IP_VS_TCP_S_FIN_WAIT
)
||
(
cp
->
state
==
IP_VS_TCP_S_CLOSE
)
||
(
cp
->
state
==
IP_VS_TCP_S_CLOSE_WAIT
)
||
(
cp
->
state
==
IP_VS_TCP_S_TIME_WAIT
)))))
ip_vs_sync_conn
(
net
,
cp
);
out:
cp
->
old_state
=
cp
->
state
;
ip_vs_conn_put
(
cp
);
ip_vs_conn_put
(
cp
);
return
ret
;
return
ret
;
...
...
net/netfilter/ipvs/ip_vs_ctl.c
View file @
9bb862be
...
@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net)
...
@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net)
}
}
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_SYSCTL
static
int
zero
;
static
int
three
=
3
;
static
int
static
int
proc_do_defense_mode
(
ctl_table
*
table
,
int
write
,
proc_do_defense_mode
(
ctl_table
*
table
,
int
write
,
void
__user
*
buffer
,
size_t
*
lenp
,
loff_t
*
ppos
)
void
__user
*
buffer
,
size_t
*
lenp
,
loff_t
*
ppos
)
...
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write,
...
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write,
memcpy
(
val
,
valp
,
sizeof
(
val
));
memcpy
(
val
,
valp
,
sizeof
(
val
));
rc
=
proc_dointvec
(
table
,
write
,
buffer
,
lenp
,
ppos
);
rc
=
proc_dointvec
(
table
,
write
,
buffer
,
lenp
,
ppos
);
if
(
write
&&
(
valp
[
0
]
<
0
||
valp
[
1
]
<
0
||
valp
[
0
]
>=
valp
[
1
]))
{
if
(
write
&&
(
valp
[
0
]
<
0
||
valp
[
1
]
<
0
||
(
valp
[
0
]
>=
valp
[
1
]
&&
valp
[
1
])))
{
/* Restore the correct value */
/* Restore the correct value */
memcpy
(
valp
,
val
,
sizeof
(
val
));
memcpy
(
valp
,
val
,
sizeof
(
val
));
}
}
...
@@ -1652,9 +1657,24 @@ proc_do_sync_mode(ctl_table *table, int write,
...
@@ -1652,9 +1657,24 @@ proc_do_sync_mode(ctl_table *table, int write,
if
((
*
valp
<
0
)
||
(
*
valp
>
1
))
{
if
((
*
valp
<
0
)
||
(
*
valp
>
1
))
{
/* Restore the correct value */
/* Restore the correct value */
*
valp
=
val
;
*
valp
=
val
;
}
else
{
}
struct
net
*
net
=
current
->
nsproxy
->
net_ns
;
}
ip_vs_sync_switch_mode
(
net
,
val
);
return
rc
;
}
static
int
proc_do_sync_ports
(
ctl_table
*
table
,
int
write
,
void
__user
*
buffer
,
size_t
*
lenp
,
loff_t
*
ppos
)
{
int
*
valp
=
table
->
data
;
int
val
=
*
valp
;
int
rc
;
rc
=
proc_dointvec
(
table
,
write
,
buffer
,
lenp
,
ppos
);
if
(
write
&&
(
*
valp
!=
val
))
{
if
(
*
valp
<
1
||
!
is_power_of_2
(
*
valp
))
{
/* Restore the correct value */
*
valp
=
val
;
}
}
}
}
return
rc
;
return
rc
;
...
@@ -1717,6 +1737,24 @@ static struct ctl_table vs_vars[] = {
...
@@ -1717,6 +1737,24 @@ static struct ctl_table vs_vars[] = {
.
mode
=
0644
,
.
mode
=
0644
,
.
proc_handler
=
&
proc_do_sync_mode
,
.
proc_handler
=
&
proc_do_sync_mode
,
},
},
{
.
procname
=
"sync_ports"
,
.
maxlen
=
sizeof
(
int
),
.
mode
=
0644
,
.
proc_handler
=
&
proc_do_sync_ports
,
},
{
.
procname
=
"sync_qlen_max"
,
.
maxlen
=
sizeof
(
int
),
.
mode
=
0644
,
.
proc_handler
=
proc_dointvec
,
},
{
.
procname
=
"sync_sock_size"
,
.
maxlen
=
sizeof
(
int
),
.
mode
=
0644
,
.
proc_handler
=
proc_dointvec
,
},
{
{
.
procname
=
"cache_bypass"
,
.
procname
=
"cache_bypass"
,
.
maxlen
=
sizeof
(
int
),
.
maxlen
=
sizeof
(
int
),
...
@@ -1742,6 +1780,20 @@ static struct ctl_table vs_vars[] = {
...
@@ -1742,6 +1780,20 @@ static struct ctl_table vs_vars[] = {
.
mode
=
0644
,
.
mode
=
0644
,
.
proc_handler
=
proc_do_sync_threshold
,
.
proc_handler
=
proc_do_sync_threshold
,
},
},
{
.
procname
=
"sync_refresh_period"
,
.
maxlen
=
sizeof
(
int
),
.
mode
=
0644
,
.
proc_handler
=
proc_dointvec_jiffies
,
},
{
.
procname
=
"sync_retries"
,
.
maxlen
=
sizeof
(
int
),
.
mode
=
0644
,
.
proc_handler
=
proc_dointvec_minmax
,
.
extra1
=
&
zero
,
.
extra2
=
&
three
,
},
{
{
.
procname
=
"nat_icmp_send"
,
.
procname
=
"nat_icmp_send"
,
.
maxlen
=
sizeof
(
int
),
.
maxlen
=
sizeof
(
int
),
...
@@ -3655,6 +3707,12 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
...
@@ -3655,6 +3707,12 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_snat_reroute
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_snat_reroute
;
ipvs
->
sysctl_sync_ver
=
1
;
ipvs
->
sysctl_sync_ver
=
1
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_sync_ver
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_sync_ver
;
ipvs
->
sysctl_sync_ports
=
1
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_sync_ports
;
ipvs
->
sysctl_sync_qlen_max
=
nr_free_buffer_pages
()
/
32
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_sync_qlen_max
;
ipvs
->
sysctl_sync_sock_size
=
0
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_sync_sock_size
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_cache_bypass
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_cache_bypass
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_expire_nodest_conn
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_expire_nodest_conn
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_expire_quiescent_template
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_expire_quiescent_template
;
...
@@ -3662,6 +3720,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
...
@@ -3662,6 +3720,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs
->
sysctl_sync_threshold
[
1
]
=
DEFAULT_SYNC_PERIOD
;
ipvs
->
sysctl_sync_threshold
[
1
]
=
DEFAULT_SYNC_PERIOD
;
tbl
[
idx
].
data
=
&
ipvs
->
sysctl_sync_threshold
;
tbl
[
idx
].
data
=
&
ipvs
->
sysctl_sync_threshold
;
tbl
[
idx
++
].
maxlen
=
sizeof
(
ipvs
->
sysctl_sync_threshold
);
tbl
[
idx
++
].
maxlen
=
sizeof
(
ipvs
->
sysctl_sync_threshold
);
ipvs
->
sysctl_sync_refresh_period
=
DEFAULT_SYNC_REFRESH_PERIOD
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_sync_refresh_period
;
ipvs
->
sysctl_sync_retries
=
clamp_t
(
int
,
DEFAULT_SYNC_RETRIES
,
0
,
3
);
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_sync_retries
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_nat_icmp_send
;
tbl
[
idx
++
].
data
=
&
ipvs
->
sysctl_nat_icmp_send
;
...
...
net/netfilter/ipvs/ip_vs_dh.c
View file @
9bb862be
...
@@ -149,7 +149,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
...
@@ -149,7 +149,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
/* allocate the DH table for this service */
/* allocate the DH table for this service */
tbl
=
kmalloc
(
sizeof
(
struct
ip_vs_dh_bucket
)
*
IP_VS_DH_TAB_SIZE
,
tbl
=
kmalloc
(
sizeof
(
struct
ip_vs_dh_bucket
)
*
IP_VS_DH_TAB_SIZE
,
GFP_
ATOMIC
);
GFP_
KERNEL
);
if
(
tbl
==
NULL
)
if
(
tbl
==
NULL
)
return
-
ENOMEM
;
return
-
ENOMEM
;
...
...
net/netfilter/ipvs/ip_vs_ftp.c
View file @
9bb862be
...
@@ -485,7 +485,7 @@ static struct pernet_operations ip_vs_ftp_ops = {
...
@@ -485,7 +485,7 @@ static struct pernet_operations ip_vs_ftp_ops = {
.
exit
=
__ip_vs_ftp_exit
,
.
exit
=
__ip_vs_ftp_exit
,
};
};
int
__init
ip_vs_ftp_init
(
void
)
static
int
__init
ip_vs_ftp_init
(
void
)
{
{
int
rv
;
int
rv
;
...
...
net/netfilter/ipvs/ip_vs_lblc.c
View file @
9bb862be
...
@@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
...
@@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
/*
/*
* Allocate the ip_vs_lblc_table for this service
* Allocate the ip_vs_lblc_table for this service
*/
*/
tbl
=
kmalloc
(
sizeof
(
*
tbl
),
GFP_
ATOMIC
);
tbl
=
kmalloc
(
sizeof
(
*
tbl
),
GFP_
KERNEL
);
if
(
tbl
==
NULL
)
if
(
tbl
==
NULL
)
return
-
ENOMEM
;
return
-
ENOMEM
;
...
...
net/netfilter/ipvs/ip_vs_lblcr.c
View file @
9bb862be
...
@@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
...
@@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
/*
/*
* Allocate the ip_vs_lblcr_table for this service
* Allocate the ip_vs_lblcr_table for this service
*/
*/
tbl
=
kmalloc
(
sizeof
(
*
tbl
),
GFP_
ATOMIC
);
tbl
=
kmalloc
(
sizeof
(
*
tbl
),
GFP_
KERNEL
);
if
(
tbl
==
NULL
)
if
(
tbl
==
NULL
)
return
-
ENOMEM
;
return
-
ENOMEM
;
...
...
net/netfilter/ipvs/ip_vs_proto.c
View file @
9bb862be
...
@@ -68,7 +68,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
...
@@ -68,7 +68,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
unsigned
int
hash
=
IP_VS_PROTO_HASH
(
pp
->
protocol
);
unsigned
int
hash
=
IP_VS_PROTO_HASH
(
pp
->
protocol
);
struct
ip_vs_proto_data
*
pd
=
struct
ip_vs_proto_data
*
pd
=
kzalloc
(
sizeof
(
struct
ip_vs_proto_data
),
GFP_
ATOMIC
);
kzalloc
(
sizeof
(
struct
ip_vs_proto_data
),
GFP_
KERNEL
);
if
(
!
pd
)
if
(
!
pd
)
return
-
ENOMEM
;
return
-
ENOMEM
;
...
@@ -156,7 +156,7 @@ EXPORT_SYMBOL(ip_vs_proto_get);
...
@@ -156,7 +156,7 @@ EXPORT_SYMBOL(ip_vs_proto_get);
/*
/*
* get ip_vs_protocol object data by netns and proto
* get ip_vs_protocol object data by netns and proto
*/
*/
struct
ip_vs_proto_data
*
st
atic
st
ruct
ip_vs_proto_data
*
__ipvs_proto_data_get
(
struct
netns_ipvs
*
ipvs
,
unsigned
short
proto
)
__ipvs_proto_data_get
(
struct
netns_ipvs
*
ipvs
,
unsigned
short
proto
)
{
{
struct
ip_vs_proto_data
*
pd
;
struct
ip_vs_proto_data
*
pd
;
...
@@ -199,7 +199,7 @@ void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
...
@@ -199,7 +199,7 @@ void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
int
*
int
*
ip_vs_create_timeout_table
(
int
*
table
,
int
size
)
ip_vs_create_timeout_table
(
int
*
table
,
int
size
)
{
{
return
kmemdup
(
table
,
size
,
GFP_
ATOMIC
);
return
kmemdup
(
table
,
size
,
GFP_
KERNEL
);
}
}
...
...
net/netfilter/ipvs/ip_vs_sh.c
View file @
9bb862be
...
@@ -162,7 +162,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
...
@@ -162,7 +162,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
/* allocate the SH table for this service */
/* allocate the SH table for this service */
tbl
=
kmalloc
(
sizeof
(
struct
ip_vs_sh_bucket
)
*
IP_VS_SH_TAB_SIZE
,
tbl
=
kmalloc
(
sizeof
(
struct
ip_vs_sh_bucket
)
*
IP_VS_SH_TAB_SIZE
,
GFP_
ATOMIC
);
GFP_
KERNEL
);
if
(
tbl
==
NULL
)
if
(
tbl
==
NULL
)
return
-
ENOMEM
;
return
-
ENOMEM
;
...
...
net/netfilter/ipvs/ip_vs_sync.c
View file @
9bb862be
...
@@ -196,6 +196,7 @@ struct ip_vs_sync_thread_data {
...
@@ -196,6 +196,7 @@ struct ip_vs_sync_thread_data {
struct
net
*
net
;
struct
net
*
net
;
struct
socket
*
sock
;
struct
socket
*
sock
;
char
*
buf
;
char
*
buf
;
int
id
;
};
};
/* Version 0 definition of packet sizes */
/* Version 0 definition of packet sizes */
...
@@ -271,13 +272,6 @@ struct ip_vs_sync_buff {
...
@@ -271,13 +272,6 @@ struct ip_vs_sync_buff {
unsigned
char
*
end
;
unsigned
char
*
end
;
};
};
/* multicast addr */
static
struct
sockaddr_in
mcast_addr
=
{
.
sin_family
=
AF_INET
,
.
sin_port
=
cpu_to_be16
(
IP_VS_SYNC_PORT
),
.
sin_addr
.
s_addr
=
cpu_to_be32
(
IP_VS_SYNC_GROUP
),
};
/*
/*
* Copy of struct ip_vs_seq
* Copy of struct ip_vs_seq
* From unaligned network order to aligned host order
* From unaligned network order to aligned host order
...
@@ -300,18 +294,22 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
...
@@ -300,18 +294,22 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
put_unaligned_be32
(
ho
->
previous_delta
,
&
no
->
previous_delta
);
put_unaligned_be32
(
ho
->
previous_delta
,
&
no
->
previous_delta
);
}
}
static
inline
struct
ip_vs_sync_buff
*
sb_dequeue
(
struct
netns_ipvs
*
ipvs
)
static
inline
struct
ip_vs_sync_buff
*
sb_dequeue
(
struct
netns_ipvs
*
ipvs
,
struct
ipvs_master_sync_state
*
ms
)
{
{
struct
ip_vs_sync_buff
*
sb
;
struct
ip_vs_sync_buff
*
sb
;
spin_lock_bh
(
&
ipvs
->
sync_lock
);
spin_lock_bh
(
&
ipvs
->
sync_lock
);
if
(
list_empty
(
&
ipv
s
->
sync_queue
))
{
if
(
list_empty
(
&
m
s
->
sync_queue
))
{
sb
=
NULL
;
sb
=
NULL
;
__set_current_state
(
TASK_INTERRUPTIBLE
);
}
else
{
}
else
{
sb
=
list_entry
(
ipvs
->
sync_queue
.
next
,
sb
=
list_entry
(
ms
->
sync_queue
.
next
,
struct
ip_vs_sync_buff
,
struct
ip_vs_sync_buff
,
list
);
list
);
list_del
(
&
sb
->
list
);
list_del
(
&
sb
->
list
);
ms
->
sync_queue_len
--
;
if
(
!
ms
->
sync_queue_len
)
ms
->
sync_queue_delay
=
0
;
}
}
spin_unlock_bh
(
&
ipvs
->
sync_lock
);
spin_unlock_bh
(
&
ipvs
->
sync_lock
);
...
@@ -334,7 +332,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
...
@@ -334,7 +332,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
kfree
(
sb
);
kfree
(
sb
);
return
NULL
;
return
NULL
;
}
}
sb
->
mesg
->
reserved
=
0
;
/* old nr_conns i.e. must be zeo now */
sb
->
mesg
->
reserved
=
0
;
/* old nr_conns i.e. must be ze
r
o now */
sb
->
mesg
->
version
=
SYNC_PROTO_VER
;
sb
->
mesg
->
version
=
SYNC_PROTO_VER
;
sb
->
mesg
->
syncid
=
ipvs
->
master_syncid
;
sb
->
mesg
->
syncid
=
ipvs
->
master_syncid
;
sb
->
mesg
->
size
=
sizeof
(
struct
ip_vs_sync_mesg
);
sb
->
mesg
->
size
=
sizeof
(
struct
ip_vs_sync_mesg
);
...
@@ -353,14 +351,22 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
...
@@ -353,14 +351,22 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
kfree
(
sb
);
kfree
(
sb
);
}
}
static
inline
void
sb_queue_tail
(
struct
netns_ipvs
*
ipvs
)
static
inline
void
sb_queue_tail
(
struct
netns_ipvs
*
ipvs
,
struct
ipvs_master_sync_state
*
ms
)
{
{
struct
ip_vs_sync_buff
*
sb
=
ipv
s
->
sync_buff
;
struct
ip_vs_sync_buff
*
sb
=
m
s
->
sync_buff
;
spin_lock
(
&
ipvs
->
sync_lock
);
spin_lock
(
&
ipvs
->
sync_lock
);
if
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
)
if
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
&&
list_add_tail
(
&
sb
->
list
,
&
ipvs
->
sync_queue
);
ms
->
sync_queue_len
<
sysctl_sync_qlen_max
(
ipvs
))
{
else
if
(
!
ms
->
sync_queue_len
)
schedule_delayed_work
(
&
ms
->
master_wakeup_work
,
max
(
IPVS_SYNC_SEND_DELAY
,
1
));
ms
->
sync_queue_len
++
;
list_add_tail
(
&
sb
->
list
,
&
ms
->
sync_queue
);
if
((
++
ms
->
sync_queue_delay
)
==
IPVS_SYNC_WAKEUP_RATE
)
wake_up_process
(
ms
->
master_thread
);
}
else
ip_vs_sync_buff_release
(
sb
);
ip_vs_sync_buff_release
(
sb
);
spin_unlock
(
&
ipvs
->
sync_lock
);
spin_unlock
(
&
ipvs
->
sync_lock
);
}
}
...
@@ -370,49 +376,26 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs)
...
@@ -370,49 +376,26 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs)
* than the specified time or the specified time is zero.
* than the specified time or the specified time is zero.
*/
*/
static
inline
struct
ip_vs_sync_buff
*
static
inline
struct
ip_vs_sync_buff
*
get_curr_sync_buff
(
struct
netns_ipvs
*
ipvs
,
unsigned
long
time
)
get_curr_sync_buff
(
struct
netns_ipvs
*
ipvs
,
struct
ipvs_master_sync_state
*
ms
,
unsigned
long
time
)
{
{
struct
ip_vs_sync_buff
*
sb
;
struct
ip_vs_sync_buff
*
sb
;
spin_lock_bh
(
&
ipvs
->
sync_buff_lock
);
spin_lock_bh
(
&
ipvs
->
sync_buff_lock
);
if
(
ipvs
->
sync_buff
&&
sb
=
ms
->
sync_buff
;
time_after_eq
(
jiffies
-
ipvs
->
sync_buff
->
firstuse
,
time
))
{
if
(
sb
&&
time_after_eq
(
jiffies
-
sb
->
firstuse
,
time
))
{
sb
=
ipvs
->
sync_buff
;
ms
->
sync_buff
=
NULL
;
ipvs
->
sync_buff
=
NULL
;
__set_current_state
(
TASK_RUNNING
)
;
}
else
}
else
sb
=
NULL
;
sb
=
NULL
;
spin_unlock_bh
(
&
ipvs
->
sync_buff_lock
);
spin_unlock_bh
(
&
ipvs
->
sync_buff_lock
);
return
sb
;
return
sb
;
}
}
/*
static
inline
int
* Switch mode from sending version 0 or 1
select_master_thread_id
(
struct
netns_ipvs
*
ipvs
,
struct
ip_vs_conn
*
cp
)
* - must handle sync_buf
*/
void
ip_vs_sync_switch_mode
(
struct
net
*
net
,
int
mode
)
{
{
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
return
((
long
)
cp
>>
(
1
+
ilog2
(
sizeof
(
*
cp
))))
&
ipvs
->
threads_mask
;
if
(
!
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
))
return
;
if
(
mode
==
sysctl_sync_ver
(
ipvs
)
||
!
ipvs
->
sync_buff
)
return
;
spin_lock_bh
(
&
ipvs
->
sync_buff_lock
);
/* Buffer empty ? then let buf_create do the job */
if
(
ipvs
->
sync_buff
->
mesg
->
size
<=
sizeof
(
struct
ip_vs_sync_mesg
))
{
kfree
(
ipvs
->
sync_buff
);
ipvs
->
sync_buff
=
NULL
;
}
else
{
spin_lock_bh
(
&
ipvs
->
sync_lock
);
if
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
)
list_add_tail
(
&
ipvs
->
sync_buff
->
list
,
&
ipvs
->
sync_queue
);
else
ip_vs_sync_buff_release
(
ipvs
->
sync_buff
);
spin_unlock_bh
(
&
ipvs
->
sync_lock
);
}
spin_unlock_bh
(
&
ipvs
->
sync_buff_lock
);
}
}
/*
/*
...
@@ -442,15 +425,101 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
...
@@ -442,15 +425,101 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
return
sb
;
return
sb
;
}
}
/* Check if conn should be synced.
* pkts: conn packets, use sysctl_sync_threshold to avoid packet check
* - (1) sync_refresh_period: reduce sync rate. Additionally, retry
* sync_retries times with period of sync_refresh_period/8
* - (2) if both sync_refresh_period and sync_period are 0 send sync only
* for state changes or only once when pkts matches sync_threshold
* - (3) templates: rate can be reduced only with sync_refresh_period or
* with (2)
*/
static
int
ip_vs_sync_conn_needed
(
struct
netns_ipvs
*
ipvs
,
struct
ip_vs_conn
*
cp
,
int
pkts
)
{
unsigned
long
orig
=
ACCESS_ONCE
(
cp
->
sync_endtime
);
unsigned
long
now
=
jiffies
;
unsigned
long
n
=
(
now
+
cp
->
timeout
)
&
~
3UL
;
unsigned
int
sync_refresh_period
;
int
sync_period
;
int
force
;
/* Check if we sync in current state */
if
(
unlikely
(
cp
->
flags
&
IP_VS_CONN_F_TEMPLATE
))
force
=
0
;
else
if
(
likely
(
cp
->
protocol
==
IPPROTO_TCP
))
{
if
(
!
((
1
<<
cp
->
state
)
&
((
1
<<
IP_VS_TCP_S_ESTABLISHED
)
|
(
1
<<
IP_VS_TCP_S_FIN_WAIT
)
|
(
1
<<
IP_VS_TCP_S_CLOSE
)
|
(
1
<<
IP_VS_TCP_S_CLOSE_WAIT
)
|
(
1
<<
IP_VS_TCP_S_TIME_WAIT
))))
return
0
;
force
=
cp
->
state
!=
cp
->
old_state
;
if
(
force
&&
cp
->
state
!=
IP_VS_TCP_S_ESTABLISHED
)
goto
set
;
}
else
if
(
unlikely
(
cp
->
protocol
==
IPPROTO_SCTP
))
{
if
(
!
((
1
<<
cp
->
state
)
&
((
1
<<
IP_VS_SCTP_S_ESTABLISHED
)
|
(
1
<<
IP_VS_SCTP_S_CLOSED
)
|
(
1
<<
IP_VS_SCTP_S_SHUT_ACK_CLI
)
|
(
1
<<
IP_VS_SCTP_S_SHUT_ACK_SER
))))
return
0
;
force
=
cp
->
state
!=
cp
->
old_state
;
if
(
force
&&
cp
->
state
!=
IP_VS_SCTP_S_ESTABLISHED
)
goto
set
;
}
else
{
/* UDP or another protocol with single state */
force
=
0
;
}
sync_refresh_period
=
sysctl_sync_refresh_period
(
ipvs
);
if
(
sync_refresh_period
>
0
)
{
long
diff
=
n
-
orig
;
long
min_diff
=
max
(
cp
->
timeout
>>
1
,
10UL
*
HZ
);
/* Avoid sync if difference is below sync_refresh_period
* and below the half timeout.
*/
if
(
abs
(
diff
)
<
min_t
(
long
,
sync_refresh_period
,
min_diff
))
{
int
retries
=
orig
&
3
;
if
(
retries
>=
sysctl_sync_retries
(
ipvs
))
return
0
;
if
(
time_before
(
now
,
orig
-
cp
->
timeout
+
(
sync_refresh_period
>>
3
)))
return
0
;
n
|=
retries
+
1
;
}
}
sync_period
=
sysctl_sync_period
(
ipvs
);
if
(
sync_period
>
0
)
{
if
(
!
(
cp
->
flags
&
IP_VS_CONN_F_TEMPLATE
)
&&
pkts
%
sync_period
!=
sysctl_sync_threshold
(
ipvs
))
return
0
;
}
else
if
(
sync_refresh_period
<=
0
&&
pkts
!=
sysctl_sync_threshold
(
ipvs
))
return
0
;
set:
cp
->
old_state
=
cp
->
state
;
n
=
cmpxchg
(
&
cp
->
sync_endtime
,
orig
,
n
);
return
n
==
orig
||
force
;
}
/*
/*
* Version 0 , could be switched in by sys_ctl.
* Version 0 , could be switched in by sys_ctl.
* Add an ip_vs_conn information into the current sync_buff.
* Add an ip_vs_conn information into the current sync_buff.
*/
*/
void
ip_vs_sync_conn_v0
(
struct
net
*
net
,
struct
ip_vs_conn
*
cp
)
static
void
ip_vs_sync_conn_v0
(
struct
net
*
net
,
struct
ip_vs_conn
*
cp
,
int
pkts
)
{
{
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
ip_vs_sync_mesg_v0
*
m
;
struct
ip_vs_sync_mesg_v0
*
m
;
struct
ip_vs_sync_conn_v0
*
s
;
struct
ip_vs_sync_conn_v0
*
s
;
struct
ip_vs_sync_buff
*
buff
;
struct
ipvs_master_sync_state
*
ms
;
int
id
;
int
len
;
int
len
;
if
(
unlikely
(
cp
->
af
!=
AF_INET
))
if
(
unlikely
(
cp
->
af
!=
AF_INET
))
...
@@ -459,21 +528,41 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
...
@@ -459,21 +528,41 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
if
(
cp
->
flags
&
IP_VS_CONN_F_ONE_PACKET
)
if
(
cp
->
flags
&
IP_VS_CONN_F_ONE_PACKET
)
return
;
return
;
if
(
!
ip_vs_sync_conn_needed
(
ipvs
,
cp
,
pkts
))
return
;
spin_lock
(
&
ipvs
->
sync_buff_lock
);
spin_lock
(
&
ipvs
->
sync_buff_lock
);
if
(
!
ipvs
->
sync_buff
)
{
if
(
!
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
))
{
ipvs
->
sync_buff
=
spin_unlock
(
&
ipvs
->
sync_buff_lock
);
ip_vs_sync_buff_create_v0
(
ipvs
);
return
;
if
(
!
ipvs
->
sync_buff
)
{
}
id
=
select_master_thread_id
(
ipvs
,
cp
);
ms
=
&
ipvs
->
ms
[
id
];
buff
=
ms
->
sync_buff
;
if
(
buff
)
{
m
=
(
struct
ip_vs_sync_mesg_v0
*
)
buff
->
mesg
;
/* Send buffer if it is for v1 */
if
(
!
m
->
nr_conns
)
{
sb_queue_tail
(
ipvs
,
ms
);
ms
->
sync_buff
=
NULL
;
buff
=
NULL
;
}
}
if
(
!
buff
)
{
buff
=
ip_vs_sync_buff_create_v0
(
ipvs
);
if
(
!
buff
)
{
spin_unlock
(
&
ipvs
->
sync_buff_lock
);
spin_unlock
(
&
ipvs
->
sync_buff_lock
);
pr_err
(
"ip_vs_sync_buff_create failed.
\n
"
);
pr_err
(
"ip_vs_sync_buff_create failed.
\n
"
);
return
;
return
;
}
}
ms
->
sync_buff
=
buff
;
}
}
len
=
(
cp
->
flags
&
IP_VS_CONN_F_SEQ_MASK
)
?
FULL_CONN_SIZE
:
len
=
(
cp
->
flags
&
IP_VS_CONN_F_SEQ_MASK
)
?
FULL_CONN_SIZE
:
SIMPLE_CONN_SIZE
;
SIMPLE_CONN_SIZE
;
m
=
(
struct
ip_vs_sync_mesg_v0
*
)
ipvs
->
sync_
buff
->
mesg
;
m
=
(
struct
ip_vs_sync_mesg_v0
*
)
buff
->
mesg
;
s
=
(
struct
ip_vs_sync_conn_v0
*
)
ipvs
->
sync_
buff
->
head
;
s
=
(
struct
ip_vs_sync_conn_v0
*
)
buff
->
head
;
/* copy members */
/* copy members */
s
->
reserved
=
0
;
s
->
reserved
=
0
;
...
@@ -494,18 +583,24 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
...
@@ -494,18 +583,24 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
m
->
nr_conns
++
;
m
->
nr_conns
++
;
m
->
size
+=
len
;
m
->
size
+=
len
;
ipvs
->
sync_
buff
->
head
+=
len
;
buff
->
head
+=
len
;
/* check if there is a space for next one */
/* check if there is a space for next one */
if
(
ipvs
->
sync_buff
->
head
+
FULL_CONN_SIZE
>
ipvs
->
sync_
buff
->
end
)
{
if
(
buff
->
head
+
FULL_CONN_SIZE
>
buff
->
end
)
{
sb_queue_tail
(
ipvs
);
sb_queue_tail
(
ipvs
,
ms
);
ipv
s
->
sync_buff
=
NULL
;
m
s
->
sync_buff
=
NULL
;
}
}
spin_unlock
(
&
ipvs
->
sync_buff_lock
);
spin_unlock
(
&
ipvs
->
sync_buff_lock
);
/* synchronize its controller if it has */
/* synchronize its controller if it has */
if
(
cp
->
control
)
cp
=
cp
->
control
;
ip_vs_sync_conn
(
net
,
cp
->
control
);
if
(
cp
)
{
if
(
cp
->
flags
&
IP_VS_CONN_F_TEMPLATE
)
pkts
=
atomic_add_return
(
1
,
&
cp
->
in_pkts
);
else
pkts
=
sysctl_sync_threshold
(
ipvs
);
ip_vs_sync_conn
(
net
,
cp
->
control
,
pkts
);
}
}
}
/*
/*
...
@@ -513,23 +608,29 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
...
@@ -513,23 +608,29 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
* Called by ip_vs_in.
* Called by ip_vs_in.
* Sending Version 1 messages
* Sending Version 1 messages
*/
*/
void
ip_vs_sync_conn
(
struct
net
*
net
,
struct
ip_vs_conn
*
cp
)
void
ip_vs_sync_conn
(
struct
net
*
net
,
struct
ip_vs_conn
*
cp
,
int
pkts
)
{
{
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
ip_vs_sync_mesg
*
m
;
struct
ip_vs_sync_mesg
*
m
;
union
ip_vs_sync_conn
*
s
;
union
ip_vs_sync_conn
*
s
;
struct
ip_vs_sync_buff
*
buff
;
struct
ipvs_master_sync_state
*
ms
;
int
id
;
__u8
*
p
;
__u8
*
p
;
unsigned
int
len
,
pe_name_len
,
pad
;
unsigned
int
len
,
pe_name_len
,
pad
;
/* Handle old version of the protocol */
/* Handle old version of the protocol */
if
(
sysctl_sync_ver
(
ipvs
)
==
0
)
{
if
(
sysctl_sync_ver
(
ipvs
)
==
0
)
{
ip_vs_sync_conn_v0
(
net
,
cp
);
ip_vs_sync_conn_v0
(
net
,
cp
,
pkts
);
return
;
return
;
}
}
/* Do not sync ONE PACKET */
/* Do not sync ONE PACKET */
if
(
cp
->
flags
&
IP_VS_CONN_F_ONE_PACKET
)
if
(
cp
->
flags
&
IP_VS_CONN_F_ONE_PACKET
)
goto
control
;
goto
control
;
sloop:
sloop:
if
(
!
ip_vs_sync_conn_needed
(
ipvs
,
cp
,
pkts
))
goto
control
;
/* Sanity checks */
/* Sanity checks */
pe_name_len
=
0
;
pe_name_len
=
0
;
if
(
cp
->
pe_data_len
)
{
if
(
cp
->
pe_data_len
)
{
...
@@ -541,6 +642,13 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
...
@@ -541,6 +642,13 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
}
}
spin_lock
(
&
ipvs
->
sync_buff_lock
);
spin_lock
(
&
ipvs
->
sync_buff_lock
);
if
(
!
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
))
{
spin_unlock
(
&
ipvs
->
sync_buff_lock
);
return
;
}
id
=
select_master_thread_id
(
ipvs
,
cp
);
ms
=
&
ipvs
->
ms
[
id
];
#ifdef CONFIG_IP_VS_IPV6
#ifdef CONFIG_IP_VS_IPV6
if
(
cp
->
af
==
AF_INET6
)
if
(
cp
->
af
==
AF_INET6
)
...
@@ -559,27 +667,32 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
...
@@ -559,27 +667,32 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
/* check if there is a space for this one */
/* check if there is a space for this one */
pad
=
0
;
pad
=
0
;
if
(
ipvs
->
sync_buff
)
{
buff
=
ms
->
sync_buff
;
pad
=
(
4
-
(
size_t
)
ipvs
->
sync_buff
->
head
)
&
3
;
if
(
buff
)
{
if
(
ipvs
->
sync_buff
->
head
+
len
+
pad
>
ipvs
->
sync_buff
->
end
)
{
m
=
buff
->
mesg
;
sb_queue_tail
(
ipvs
);
pad
=
(
4
-
(
size_t
)
buff
->
head
)
&
3
;
ipvs
->
sync_buff
=
NULL
;
/* Send buffer if it is for v0 */
if
(
buff
->
head
+
len
+
pad
>
buff
->
end
||
m
->
reserved
)
{
sb_queue_tail
(
ipvs
,
ms
);
ms
->
sync_buff
=
NULL
;
buff
=
NULL
;
pad
=
0
;
pad
=
0
;
}
}
}
}
if
(
!
ipvs
->
sync_
buff
)
{
if
(
!
buff
)
{
ipvs
->
sync_
buff
=
ip_vs_sync_buff_create
(
ipvs
);
buff
=
ip_vs_sync_buff_create
(
ipvs
);
if
(
!
ipvs
->
sync_
buff
)
{
if
(
!
buff
)
{
spin_unlock
(
&
ipvs
->
sync_buff_lock
);
spin_unlock
(
&
ipvs
->
sync_buff_lock
);
pr_err
(
"ip_vs_sync_buff_create failed.
\n
"
);
pr_err
(
"ip_vs_sync_buff_create failed.
\n
"
);
return
;
return
;
}
}
ms
->
sync_buff
=
buff
;
m
=
buff
->
mesg
;
}
}
m
=
ipvs
->
sync_buff
->
mesg
;
p
=
buff
->
head
;
p
=
ipvs
->
sync_buff
->
head
;
buff
->
head
+=
pad
+
len
;
ipvs
->
sync_buff
->
head
+=
pad
+
len
;
m
->
size
+=
pad
+
len
;
m
->
size
+=
pad
+
len
;
/* Add ev. padding from prev. sync_conn */
/* Add ev. padding from prev. sync_conn */
while
(
pad
--
)
while
(
pad
--
)
...
@@ -644,16 +757,10 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
...
@@ -644,16 +757,10 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
cp
=
cp
->
control
;
cp
=
cp
->
control
;
if
(
!
cp
)
if
(
!
cp
)
return
;
return
;
/*
if
(
cp
->
flags
&
IP_VS_CONN_F_TEMPLATE
)
* Reduce sync rate for templates
pkts
=
atomic_add_return
(
1
,
&
cp
->
in_pkts
);
* i.e only increment in_pkts for Templates.
else
*/
pkts
=
sysctl_sync_threshold
(
ipvs
);
if
(
cp
->
flags
&
IP_VS_CONN_F_TEMPLATE
)
{
int
pkts
=
atomic_add_return
(
1
,
&
cp
->
in_pkts
);
if
(
pkts
%
sysctl_sync_period
(
ipvs
)
!=
1
)
return
;
}
goto
sloop
;
goto
sloop
;
}
}
...
@@ -731,9 +838,32 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
...
@@ -731,9 +838,32 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
else
else
cp
=
ip_vs_ct_in_get
(
param
);
cp
=
ip_vs_ct_in_get
(
param
);
if
(
cp
&&
param
->
pe_data
)
/* Free pe_data */
if
(
cp
)
{
/* Free pe_data */
kfree
(
param
->
pe_data
);
kfree
(
param
->
pe_data
);
if
(
!
cp
)
{
dest
=
cp
->
dest
;
spin_lock
(
&
cp
->
lock
);
if
((
cp
->
flags
^
flags
)
&
IP_VS_CONN_F_INACTIVE
&&
!
(
flags
&
IP_VS_CONN_F_TEMPLATE
)
&&
dest
)
{
if
(
flags
&
IP_VS_CONN_F_INACTIVE
)
{
atomic_dec
(
&
dest
->
activeconns
);
atomic_inc
(
&
dest
->
inactconns
);
}
else
{
atomic_inc
(
&
dest
->
activeconns
);
atomic_dec
(
&
dest
->
inactconns
);
}
}
flags
&=
IP_VS_CONN_F_BACKUP_UPD_MASK
;
flags
|=
cp
->
flags
&
~
IP_VS_CONN_F_BACKUP_UPD_MASK
;
cp
->
flags
=
flags
;
spin_unlock
(
&
cp
->
lock
);
if
(
!
dest
)
{
dest
=
ip_vs_try_bind_dest
(
cp
);
if
(
dest
)
atomic_dec
(
&
dest
->
refcnt
);
}
}
else
{
/*
/*
* Find the appropriate destination for the connection.
* Find the appropriate destination for the connection.
* If it is not found the connection will remain unbound
* If it is not found the connection will remain unbound
...
@@ -742,18 +872,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
...
@@ -742,18 +872,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
dest
=
ip_vs_find_dest
(
net
,
type
,
daddr
,
dport
,
param
->
vaddr
,
dest
=
ip_vs_find_dest
(
net
,
type
,
daddr
,
dport
,
param
->
vaddr
,
param
->
vport
,
protocol
,
fwmark
,
flags
);
param
->
vport
,
protocol
,
fwmark
,
flags
);
/* Set the approprite ativity flag */
if
(
protocol
==
IPPROTO_TCP
)
{
if
(
state
!=
IP_VS_TCP_S_ESTABLISHED
)
flags
|=
IP_VS_CONN_F_INACTIVE
;
else
flags
&=
~
IP_VS_CONN_F_INACTIVE
;
}
else
if
(
protocol
==
IPPROTO_SCTP
)
{
if
(
state
!=
IP_VS_SCTP_S_ESTABLISHED
)
flags
|=
IP_VS_CONN_F_INACTIVE
;
else
flags
&=
~
IP_VS_CONN_F_INACTIVE
;
}
cp
=
ip_vs_conn_new
(
param
,
daddr
,
dport
,
flags
,
dest
,
fwmark
);
cp
=
ip_vs_conn_new
(
param
,
daddr
,
dport
,
flags
,
dest
,
fwmark
);
if
(
dest
)
if
(
dest
)
atomic_dec
(
&
dest
->
refcnt
);
atomic_dec
(
&
dest
->
refcnt
);
...
@@ -763,34 +881,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
...
@@ -763,34 +881,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
IP_VS_DBG
(
2
,
"BACKUP, add new conn. failed
\n
"
);
IP_VS_DBG
(
2
,
"BACKUP, add new conn. failed
\n
"
);
return
;
return
;
}
}
}
else
if
(
!
cp
->
dest
)
{
dest
=
ip_vs_try_bind_dest
(
cp
);
if
(
dest
)
atomic_dec
(
&
dest
->
refcnt
);
}
else
if
((
cp
->
dest
)
&&
(
cp
->
protocol
==
IPPROTO_TCP
)
&&
(
cp
->
state
!=
state
))
{
/* update active/inactive flag for the connection */
dest
=
cp
->
dest
;
if
(
!
(
cp
->
flags
&
IP_VS_CONN_F_INACTIVE
)
&&
(
state
!=
IP_VS_TCP_S_ESTABLISHED
))
{
atomic_dec
(
&
dest
->
activeconns
);
atomic_inc
(
&
dest
->
inactconns
);
cp
->
flags
|=
IP_VS_CONN_F_INACTIVE
;
}
else
if
((
cp
->
flags
&
IP_VS_CONN_F_INACTIVE
)
&&
(
state
==
IP_VS_TCP_S_ESTABLISHED
))
{
atomic_inc
(
&
dest
->
activeconns
);
atomic_dec
(
&
dest
->
inactconns
);
cp
->
flags
&=
~
IP_VS_CONN_F_INACTIVE
;
}
}
else
if
((
cp
->
dest
)
&&
(
cp
->
protocol
==
IPPROTO_SCTP
)
&&
(
cp
->
state
!=
state
))
{
dest
=
cp
->
dest
;
if
(
!
(
cp
->
flags
&
IP_VS_CONN_F_INACTIVE
)
&&
(
state
!=
IP_VS_SCTP_S_ESTABLISHED
))
{
atomic_dec
(
&
dest
->
activeconns
);
atomic_inc
(
&
dest
->
inactconns
);
cp
->
flags
&=
~
IP_VS_CONN_F_INACTIVE
;
}
}
}
if
(
opt
)
if
(
opt
)
...
@@ -1148,6 +1238,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
...
@@ -1148,6 +1238,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
}
}
/*
* Setup sndbuf (mode=1) or rcvbuf (mode=0)
*/
static
void
set_sock_size
(
struct
sock
*
sk
,
int
mode
,
int
val
)
{
/* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */
/* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */
lock_sock
(
sk
);
if
(
mode
)
{
val
=
clamp_t
(
int
,
val
,
(
SOCK_MIN_SNDBUF
+
1
)
/
2
,
sysctl_wmem_max
);
sk
->
sk_sndbuf
=
val
*
2
;
sk
->
sk_userlocks
|=
SOCK_SNDBUF_LOCK
;
}
else
{
val
=
clamp_t
(
int
,
val
,
(
SOCK_MIN_RCVBUF
+
1
)
/
2
,
sysctl_rmem_max
);
sk
->
sk_rcvbuf
=
val
*
2
;
sk
->
sk_userlocks
|=
SOCK_RCVBUF_LOCK
;
}
release_sock
(
sk
);
}
/*
/*
* Setup loopback of outgoing multicasts on a sending socket
* Setup loopback of outgoing multicasts on a sending socket
*/
*/
...
@@ -1298,9 +1410,15 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
...
@@ -1298,9 +1410,15 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
/*
/*
* Set up sending multicast socket over UDP
* Set up sending multicast socket over UDP
*/
*/
static
struct
socket
*
make_send_sock
(
struct
net
*
net
)
static
struct
socket
*
make_send_sock
(
struct
net
*
net
,
int
id
)
{
{
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
/* multicast addr */
struct
sockaddr_in
mcast_addr
=
{
.
sin_family
=
AF_INET
,
.
sin_port
=
cpu_to_be16
(
IP_VS_SYNC_PORT
+
id
),
.
sin_addr
.
s_addr
=
cpu_to_be32
(
IP_VS_SYNC_GROUP
),
};
struct
socket
*
sock
;
struct
socket
*
sock
;
int
result
;
int
result
;
...
@@ -1324,6 +1442,9 @@ static struct socket *make_send_sock(struct net *net)
...
@@ -1324,6 +1442,9 @@ static struct socket *make_send_sock(struct net *net)
set_mcast_loop
(
sock
->
sk
,
0
);
set_mcast_loop
(
sock
->
sk
,
0
);
set_mcast_ttl
(
sock
->
sk
,
1
);
set_mcast_ttl
(
sock
->
sk
,
1
);
result
=
sysctl_sync_sock_size
(
ipvs
);
if
(
result
>
0
)
set_sock_size
(
sock
->
sk
,
1
,
result
);
result
=
bind_mcastif_addr
(
sock
,
ipvs
->
master_mcast_ifn
);
result
=
bind_mcastif_addr
(
sock
,
ipvs
->
master_mcast_ifn
);
if
(
result
<
0
)
{
if
(
result
<
0
)
{
...
@@ -1349,9 +1470,15 @@ static struct socket *make_send_sock(struct net *net)
...
@@ -1349,9 +1470,15 @@ static struct socket *make_send_sock(struct net *net)
/*
/*
* Set up receiving multicast socket over UDP
* Set up receiving multicast socket over UDP
*/
*/
static
struct
socket
*
make_receive_sock
(
struct
net
*
net
)
static
struct
socket
*
make_receive_sock
(
struct
net
*
net
,
int
id
)
{
{
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
/* multicast addr */
struct
sockaddr_in
mcast_addr
=
{
.
sin_family
=
AF_INET
,
.
sin_port
=
cpu_to_be16
(
IP_VS_SYNC_PORT
+
id
),
.
sin_addr
.
s_addr
=
cpu_to_be32
(
IP_VS_SYNC_GROUP
),
};
struct
socket
*
sock
;
struct
socket
*
sock
;
int
result
;
int
result
;
...
@@ -1369,6 +1496,9 @@ static struct socket *make_receive_sock(struct net *net)
...
@@ -1369,6 +1496,9 @@ static struct socket *make_receive_sock(struct net *net)
sk_change_net
(
sock
->
sk
,
net
);
sk_change_net
(
sock
->
sk
,
net
);
/* it is equivalent to the REUSEADDR option in user-space */
/* it is equivalent to the REUSEADDR option in user-space */
sock
->
sk
->
sk_reuse
=
SK_CAN_REUSE
;
sock
->
sk
->
sk_reuse
=
SK_CAN_REUSE
;
result
=
sysctl_sync_sock_size
(
ipvs
);
if
(
result
>
0
)
set_sock_size
(
sock
->
sk
,
0
,
result
);
result
=
sock
->
ops
->
bind
(
sock
,
(
struct
sockaddr
*
)
&
mcast_addr
,
result
=
sock
->
ops
->
bind
(
sock
,
(
struct
sockaddr
*
)
&
mcast_addr
,
sizeof
(
struct
sockaddr
));
sizeof
(
struct
sockaddr
));
...
@@ -1411,18 +1541,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
...
@@ -1411,18 +1541,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
return
len
;
return
len
;
}
}
static
void
static
int
ip_vs_send_sync_msg
(
struct
socket
*
sock
,
struct
ip_vs_sync_mesg
*
msg
)
ip_vs_send_sync_msg
(
struct
socket
*
sock
,
struct
ip_vs_sync_mesg
*
msg
)
{
{
int
msize
;
int
msize
;
int
ret
;
msize
=
msg
->
size
;
msize
=
msg
->
size
;
/* Put size in network byte order */
/* Put size in network byte order */
msg
->
size
=
htons
(
msg
->
size
);
msg
->
size
=
htons
(
msg
->
size
);
if
(
ip_vs_send_async
(
sock
,
(
char
*
)
msg
,
msize
)
!=
msize
)
ret
=
ip_vs_send_async
(
sock
,
(
char
*
)
msg
,
msize
);
pr_err
(
"ip_vs_send_async error
\n
"
);
if
(
ret
>=
0
||
ret
==
-
EAGAIN
)
return
ret
;
pr_err
(
"ip_vs_send_async error %d
\n
"
,
ret
);
return
0
;
}
}
static
int
static
int
...
@@ -1438,48 +1572,90 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
...
@@ -1438,48 +1572,90 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
iov
.
iov_base
=
buffer
;
iov
.
iov_base
=
buffer
;
iov
.
iov_len
=
(
size_t
)
buflen
;
iov
.
iov_len
=
(
size_t
)
buflen
;
len
=
kernel_recvmsg
(
sock
,
&
msg
,
&
iov
,
1
,
buflen
,
0
);
len
=
kernel_recvmsg
(
sock
,
&
msg
,
&
iov
,
1
,
buflen
,
MSG_DONTWAIT
);
if
(
len
<
0
)
if
(
len
<
0
)
return
-
1
;
return
len
;
LeaveFunction
(
7
);
LeaveFunction
(
7
);
return
len
;
return
len
;
}
}
/* Wakeup the master thread for sending */
static
void
master_wakeup_work_handler
(
struct
work_struct
*
work
)
{
struct
ipvs_master_sync_state
*
ms
=
container_of
(
work
,
struct
ipvs_master_sync_state
,
master_wakeup_work
.
work
);
struct
netns_ipvs
*
ipvs
=
ms
->
ipvs
;
spin_lock_bh
(
&
ipvs
->
sync_lock
);
if
(
ms
->
sync_queue_len
&&
ms
->
sync_queue_delay
<
IPVS_SYNC_WAKEUP_RATE
)
{
ms
->
sync_queue_delay
=
IPVS_SYNC_WAKEUP_RATE
;
wake_up_process
(
ms
->
master_thread
);
}
spin_unlock_bh
(
&
ipvs
->
sync_lock
);
}
/* Get next buffer to send */
static
inline
struct
ip_vs_sync_buff
*
next_sync_buff
(
struct
netns_ipvs
*
ipvs
,
struct
ipvs_master_sync_state
*
ms
)
{
struct
ip_vs_sync_buff
*
sb
;
sb
=
sb_dequeue
(
ipvs
,
ms
);
if
(
sb
)
return
sb
;
/* Do not delay entries in buffer for more than 2 seconds */
return
get_curr_sync_buff
(
ipvs
,
ms
,
IPVS_SYNC_FLUSH_TIME
);
}
static
int
sync_thread_master
(
void
*
data
)
static
int
sync_thread_master
(
void
*
data
)
{
{
struct
ip_vs_sync_thread_data
*
tinfo
=
data
;
struct
ip_vs_sync_thread_data
*
tinfo
=
data
;
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
tinfo
->
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
tinfo
->
net
);
struct
ipvs_master_sync_state
*
ms
=
&
ipvs
->
ms
[
tinfo
->
id
];
struct
sock
*
sk
=
tinfo
->
sock
->
sk
;
struct
ip_vs_sync_buff
*
sb
;
struct
ip_vs_sync_buff
*
sb
;
pr_info
(
"sync thread started: state = MASTER, mcast_ifn = %s, "
pr_info
(
"sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d
\n
"
,
"syncid = %d
, id = %d
\n
"
,
ipvs
->
master_mcast_ifn
,
ipvs
->
master_syncid
);
ipvs
->
master_mcast_ifn
,
ipvs
->
master_syncid
,
tinfo
->
id
);
while
(
!
kthread_should_stop
())
{
for
(;;)
{
while
((
sb
=
sb_dequeue
(
ipvs
)))
{
sb
=
next_sync_buff
(
ipvs
,
ms
);
ip_vs_send_sync_msg
(
tinfo
->
sock
,
sb
->
mesg
);
if
(
unlikely
(
kthread_should_stop
()))
ip_vs_sync_buff_release
(
sb
);
break
;
if
(
!
sb
)
{
schedule_timeout
(
IPVS_SYNC_CHECK_PERIOD
);
continue
;
}
}
while
(
ip_vs_send_sync_msg
(
tinfo
->
sock
,
sb
->
mesg
)
<
0
)
{
int
ret
=
0
;
/* check if entries stay in ipvs->sync_buff for 2 seconds */
__wait_event_interruptible
(
*
sk_sleep
(
sk
),
sb
=
get_curr_sync_buff
(
ipvs
,
2
*
HZ
);
sock_writeable
(
sk
)
||
if
(
sb
)
{
kthread_should_stop
(),
ip_vs_send_sync_msg
(
tinfo
->
sock
,
sb
->
mesg
);
ret
);
if
(
unlikely
(
kthread_should_stop
()))
goto
done
;
}
ip_vs_sync_buff_release
(
sb
);
ip_vs_sync_buff_release
(
sb
);
}
}
schedule_timeout_interruptible
(
HZ
);
done:
}
__set_current_state
(
TASK_RUNNING
);
if
(
sb
)
ip_vs_sync_buff_release
(
sb
);
/* clean up the sync_buff queue */
/* clean up the sync_buff queue */
while
((
sb
=
sb_dequeue
(
ipvs
)))
while
((
sb
=
sb_dequeue
(
ipvs
,
ms
)))
ip_vs_sync_buff_release
(
sb
);
ip_vs_sync_buff_release
(
sb
);
__set_current_state
(
TASK_RUNNING
);
/* clean up the current sync_buff */
/* clean up the current sync_buff */
sb
=
get_curr_sync_buff
(
ipvs
,
0
);
sb
=
get_curr_sync_buff
(
ipvs
,
ms
,
0
);
if
(
sb
)
if
(
sb
)
ip_vs_sync_buff_release
(
sb
);
ip_vs_sync_buff_release
(
sb
);
...
@@ -1498,8 +1674,8 @@ static int sync_thread_backup(void *data)
...
@@ -1498,8 +1674,8 @@ static int sync_thread_backup(void *data)
int
len
;
int
len
;
pr_info
(
"sync thread started: state = BACKUP, mcast_ifn = %s, "
pr_info
(
"sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d
\n
"
,
"syncid = %d
, id = %d
\n
"
,
ipvs
->
backup_mcast_ifn
,
ipvs
->
backup_syncid
);
ipvs
->
backup_mcast_ifn
,
ipvs
->
backup_syncid
,
tinfo
->
id
);
while
(
!
kthread_should_stop
())
{
while
(
!
kthread_should_stop
())
{
wait_event_interruptible
(
*
sk_sleep
(
tinfo
->
sock
->
sk
),
wait_event_interruptible
(
*
sk_sleep
(
tinfo
->
sock
->
sk
),
...
@@ -1511,6 +1687,7 @@ static int sync_thread_backup(void *data)
...
@@ -1511,6 +1687,7 @@ static int sync_thread_backup(void *data)
len
=
ip_vs_receive
(
tinfo
->
sock
,
tinfo
->
buf
,
len
=
ip_vs_receive
(
tinfo
->
sock
,
tinfo
->
buf
,
ipvs
->
recv_mesg_maxlen
);
ipvs
->
recv_mesg_maxlen
);
if
(
len
<=
0
)
{
if
(
len
<=
0
)
{
if
(
len
!=
-
EAGAIN
)
pr_err
(
"receiving message error
\n
"
);
pr_err
(
"receiving message error
\n
"
);
break
;
break
;
}
}
...
@@ -1535,86 +1712,140 @@ static int sync_thread_backup(void *data)
...
@@ -1535,86 +1712,140 @@ static int sync_thread_backup(void *data)
int
start_sync_thread
(
struct
net
*
net
,
int
state
,
char
*
mcast_ifn
,
__u8
syncid
)
int
start_sync_thread
(
struct
net
*
net
,
int
state
,
char
*
mcast_ifn
,
__u8
syncid
)
{
{
struct
ip_vs_sync_thread_data
*
tinfo
;
struct
ip_vs_sync_thread_data
*
tinfo
;
struct
task_struct
**
realtask
,
*
task
;
struct
task_struct
**
array
=
NULL
,
*
task
;
struct
socket
*
sock
;
struct
socket
*
sock
;
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
char
*
name
,
*
buf
=
NULL
;
char
*
name
;
int
(
*
threadfn
)(
void
*
data
);
int
(
*
threadfn
)(
void
*
data
);
int
id
,
count
;
int
result
=
-
ENOMEM
;
int
result
=
-
ENOMEM
;
IP_VS_DBG
(
7
,
"%s(): pid %d
\n
"
,
__func__
,
task_pid_nr
(
current
));
IP_VS_DBG
(
7
,
"%s(): pid %d
\n
"
,
__func__
,
task_pid_nr
(
current
));
IP_VS_DBG
(
7
,
"Each ip_vs_sync_conn entry needs %Zd bytes
\n
"
,
IP_VS_DBG
(
7
,
"Each ip_vs_sync_conn entry needs %Zd bytes
\n
"
,
sizeof
(
struct
ip_vs_sync_conn_v0
));
sizeof
(
struct
ip_vs_sync_conn_v0
));
if
(
!
ipvs
->
sync_state
)
{
count
=
clamp
(
sysctl_sync_ports
(
ipvs
),
1
,
IPVS_SYNC_PORTS_MAX
);
ipvs
->
threads_mask
=
count
-
1
;
}
else
count
=
ipvs
->
threads_mask
+
1
;
if
(
state
==
IP_VS_STATE_MASTER
)
{
if
(
state
==
IP_VS_STATE_MASTER
)
{
if
(
ipvs
->
m
aster_thread
)
if
(
ipvs
->
m
s
)
return
-
EEXIST
;
return
-
EEXIST
;
strlcpy
(
ipvs
->
master_mcast_ifn
,
mcast_ifn
,
strlcpy
(
ipvs
->
master_mcast_ifn
,
mcast_ifn
,
sizeof
(
ipvs
->
master_mcast_ifn
));
sizeof
(
ipvs
->
master_mcast_ifn
));
ipvs
->
master_syncid
=
syncid
;
ipvs
->
master_syncid
=
syncid
;
realtask
=
&
ipvs
->
master_thread
;
name
=
"ipvs-m:%d:%d"
;
name
=
"ipvs_master:%d"
;
threadfn
=
sync_thread_master
;
threadfn
=
sync_thread_master
;
sock
=
make_send_sock
(
net
);
}
else
if
(
state
==
IP_VS_STATE_BACKUP
)
{
}
else
if
(
state
==
IP_VS_STATE_BACKUP
)
{
if
(
ipvs
->
backup_thread
)
if
(
ipvs
->
backup_thread
s
)
return
-
EEXIST
;
return
-
EEXIST
;
strlcpy
(
ipvs
->
backup_mcast_ifn
,
mcast_ifn
,
strlcpy
(
ipvs
->
backup_mcast_ifn
,
mcast_ifn
,
sizeof
(
ipvs
->
backup_mcast_ifn
));
sizeof
(
ipvs
->
backup_mcast_ifn
));
ipvs
->
backup_syncid
=
syncid
;
ipvs
->
backup_syncid
=
syncid
;
realtask
=
&
ipvs
->
backup_thread
;
name
=
"ipvs-b:%d:%d"
;
name
=
"ipvs_backup:%d"
;
threadfn
=
sync_thread_backup
;
threadfn
=
sync_thread_backup
;
sock
=
make_receive_sock
(
net
);
}
else
{
}
else
{
return
-
EINVAL
;
return
-
EINVAL
;
}
}
if
(
IS_ERR
(
sock
))
{
if
(
state
==
IP_VS_STATE_MASTER
)
{
result
=
PTR_ERR
(
sock
);
struct
ipvs_master_sync_state
*
ms
;
ipvs
->
ms
=
kzalloc
(
count
*
sizeof
(
ipvs
->
ms
[
0
]),
GFP_KERNEL
);
if
(
!
ipvs
->
ms
)
goto
out
;
goto
out
;
ms
=
ipvs
->
ms
;
for
(
id
=
0
;
id
<
count
;
id
++
,
ms
++
)
{
INIT_LIST_HEAD
(
&
ms
->
sync_queue
);
ms
->
sync_queue_len
=
0
;
ms
->
sync_queue_delay
=
0
;
INIT_DELAYED_WORK
(
&
ms
->
master_wakeup_work
,
master_wakeup_work_handler
);
ms
->
ipvs
=
ipvs
;
}
}
}
else
{
set_sync_mesg_maxlen
(
net
,
state
);
array
=
kzalloc
(
count
*
sizeof
(
struct
task_struct
*
),
if
(
state
==
IP_VS_STATE_BACKUP
)
{
GFP_KERNEL
);
buf
=
kmalloc
(
ipvs
->
recv_mesg_maxlen
,
GFP_KERNEL
);
if
(
!
array
)
if
(
!
buf
)
goto
out
;
goto
outsocket
;
}
}
set_sync_mesg_maxlen
(
net
,
state
);
tinfo
=
NULL
;
for
(
id
=
0
;
id
<
count
;
id
++
)
{
if
(
state
==
IP_VS_STATE_MASTER
)
sock
=
make_send_sock
(
net
,
id
);
else
sock
=
make_receive_sock
(
net
,
id
);
if
(
IS_ERR
(
sock
))
{
result
=
PTR_ERR
(
sock
);
goto
outtinfo
;
}
tinfo
=
kmalloc
(
sizeof
(
*
tinfo
),
GFP_KERNEL
);
tinfo
=
kmalloc
(
sizeof
(
*
tinfo
),
GFP_KERNEL
);
if
(
!
tinfo
)
if
(
!
tinfo
)
goto
outbuf
;
goto
outsocket
;
tinfo
->
net
=
net
;
tinfo
->
net
=
net
;
tinfo
->
sock
=
sock
;
tinfo
->
sock
=
sock
;
tinfo
->
buf
=
buf
;
if
(
state
==
IP_VS_STATE_BACKUP
)
{
tinfo
->
buf
=
kmalloc
(
ipvs
->
recv_mesg_maxlen
,
GFP_KERNEL
);
if
(
!
tinfo
->
buf
)
goto
outtinfo
;
}
tinfo
->
id
=
id
;
task
=
kthread_run
(
threadfn
,
tinfo
,
name
,
ipvs
->
gen
);
task
=
kthread_run
(
threadfn
,
tinfo
,
name
,
ipvs
->
gen
,
id
);
if
(
IS_ERR
(
task
))
{
if
(
IS_ERR
(
task
))
{
result
=
PTR_ERR
(
task
);
result
=
PTR_ERR
(
task
);
goto
outtinfo
;
goto
outtinfo
;
}
}
tinfo
=
NULL
;
if
(
state
==
IP_VS_STATE_MASTER
)
ipvs
->
ms
[
id
].
master_thread
=
task
;
else
array
[
id
]
=
task
;
}
/* mark as active */
/* mark as active */
*
realtask
=
task
;
if
(
state
==
IP_VS_STATE_BACKUP
)
ipvs
->
backup_threads
=
array
;
spin_lock_bh
(
&
ipvs
->
sync_buff_lock
);
ipvs
->
sync_state
|=
state
;
ipvs
->
sync_state
|=
state
;
spin_unlock_bh
(
&
ipvs
->
sync_buff_lock
);
/* increase the module use count */
/* increase the module use count */
ip_vs_use_count_inc
();
ip_vs_use_count_inc
();
return
0
;
return
0
;
outtinfo:
kfree
(
tinfo
);
outbuf:
kfree
(
buf
);
outsocket:
outsocket:
sk_release_kernel
(
sock
->
sk
);
sk_release_kernel
(
sock
->
sk
);
outtinfo:
if
(
tinfo
)
{
sk_release_kernel
(
tinfo
->
sock
->
sk
);
kfree
(
tinfo
->
buf
);
kfree
(
tinfo
);
}
count
=
id
;
while
(
count
--
>
0
)
{
if
(
state
==
IP_VS_STATE_MASTER
)
kthread_stop
(
ipvs
->
ms
[
count
].
master_thread
);
else
kthread_stop
(
array
[
count
]);
}
kfree
(
array
);
out:
out:
if
(
!
(
ipvs
->
sync_state
&
IP_VS_STATE_MASTER
))
{
kfree
(
ipvs
->
ms
);
ipvs
->
ms
=
NULL
;
}
return
result
;
return
result
;
}
}
...
@@ -1622,38 +1853,60 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
...
@@ -1622,38 +1853,60 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
int
stop_sync_thread
(
struct
net
*
net
,
int
state
)
int
stop_sync_thread
(
struct
net
*
net
,
int
state
)
{
{
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
task_struct
**
array
;
int
id
;
int
retc
=
-
EINVAL
;
int
retc
=
-
EINVAL
;
IP_VS_DBG
(
7
,
"%s(): pid %d
\n
"
,
__func__
,
task_pid_nr
(
current
));
IP_VS_DBG
(
7
,
"%s(): pid %d
\n
"
,
__func__
,
task_pid_nr
(
current
));
if
(
state
==
IP_VS_STATE_MASTER
)
{
if
(
state
==
IP_VS_STATE_MASTER
)
{
if
(
!
ipvs
->
m
aster_thread
)
if
(
!
ipvs
->
m
s
)
return
-
ESRCH
;
return
-
ESRCH
;
pr_info
(
"stopping master sync thread %d ...
\n
"
,
task_pid_nr
(
ipvs
->
master_thread
));
/*
/*
* The lock synchronizes with sb_queue_tail(), so that we don't
* The lock synchronizes with sb_queue_tail(), so that we don't
* add sync buffers to the queue, when we are already in
* add sync buffers to the queue, when we are already in
* progress of stopping the master sync daemon.
* progress of stopping the master sync daemon.
*/
*/
spin_lock_bh
(
&
ipvs
->
sync_lock
);
spin_lock_bh
(
&
ipvs
->
sync_buff_lock
);
spin_lock
(
&
ipvs
->
sync_lock
);
ipvs
->
sync_state
&=
~
IP_VS_STATE_MASTER
;
ipvs
->
sync_state
&=
~
IP_VS_STATE_MASTER
;
spin_unlock_bh
(
&
ipvs
->
sync_lock
);
spin_unlock
(
&
ipvs
->
sync_lock
);
retc
=
kthread_stop
(
ipvs
->
master_thread
);
spin_unlock_bh
(
&
ipvs
->
sync_buff_lock
);
ipvs
->
master_thread
=
NULL
;
retc
=
0
;
for
(
id
=
ipvs
->
threads_mask
;
id
>=
0
;
id
--
)
{
struct
ipvs_master_sync_state
*
ms
=
&
ipvs
->
ms
[
id
];
int
ret
;
pr_info
(
"stopping master sync thread %d ...
\n
"
,
task_pid_nr
(
ms
->
master_thread
));
cancel_delayed_work_sync
(
&
ms
->
master_wakeup_work
);
ret
=
kthread_stop
(
ms
->
master_thread
);
if
(
retc
>=
0
)
retc
=
ret
;
}
kfree
(
ipvs
->
ms
);
ipvs
->
ms
=
NULL
;
}
else
if
(
state
==
IP_VS_STATE_BACKUP
)
{
}
else
if
(
state
==
IP_VS_STATE_BACKUP
)
{
if
(
!
ipvs
->
backup_thread
)
if
(
!
ipvs
->
backup_thread
s
)
return
-
ESRCH
;
return
-
ESRCH
;
pr_info
(
"stopping backup sync thread %d ...
\n
"
,
task_pid_nr
(
ipvs
->
backup_thread
));
ipvs
->
sync_state
&=
~
IP_VS_STATE_BACKUP
;
ipvs
->
sync_state
&=
~
IP_VS_STATE_BACKUP
;
retc
=
kthread_stop
(
ipvs
->
backup_thread
);
array
=
ipvs
->
backup_threads
;
ipvs
->
backup_thread
=
NULL
;
retc
=
0
;
for
(
id
=
ipvs
->
threads_mask
;
id
>=
0
;
id
--
)
{
int
ret
;
pr_info
(
"stopping backup sync thread %d ...
\n
"
,
task_pid_nr
(
array
[
id
]));
ret
=
kthread_stop
(
array
[
id
]);
if
(
retc
>=
0
)
retc
=
ret
;
}
kfree
(
array
);
ipvs
->
backup_threads
=
NULL
;
}
}
/* decrease the module use count */
/* decrease the module use count */
...
@@ -1670,13 +1923,8 @@ int __net_init ip_vs_sync_net_init(struct net *net)
...
@@ -1670,13 +1923,8 @@ int __net_init ip_vs_sync_net_init(struct net *net)
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
struct
netns_ipvs
*
ipvs
=
net_ipvs
(
net
);
__mutex_init
(
&
ipvs
->
sync_mutex
,
"ipvs->sync_mutex"
,
&
__ipvs_sync_key
);
__mutex_init
(
&
ipvs
->
sync_mutex
,
"ipvs->sync_mutex"
,
&
__ipvs_sync_key
);
INIT_LIST_HEAD
(
&
ipvs
->
sync_queue
);
spin_lock_init
(
&
ipvs
->
sync_lock
);
spin_lock_init
(
&
ipvs
->
sync_lock
);
spin_lock_init
(
&
ipvs
->
sync_buff_lock
);
spin_lock_init
(
&
ipvs
->
sync_buff_lock
);
ipvs
->
sync_mcast_addr
.
sin_family
=
AF_INET
;
ipvs
->
sync_mcast_addr
.
sin_port
=
cpu_to_be16
(
IP_VS_SYNC_PORT
);
ipvs
->
sync_mcast_addr
.
sin_addr
.
s_addr
=
cpu_to_be32
(
IP_VS_SYNC_GROUP
);
return
0
;
return
0
;
}
}
...
...
net/netfilter/ipvs/ip_vs_wrr.c
View file @
9bb862be
...
@@ -84,7 +84,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
...
@@ -84,7 +84,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
/*
/*
* Allocate the mark variable for WRR scheduling
* Allocate the mark variable for WRR scheduling
*/
*/
mark
=
kmalloc
(
sizeof
(
struct
ip_vs_wrr_mark
),
GFP_
ATOMIC
);
mark
=
kmalloc
(
sizeof
(
struct
ip_vs_wrr_mark
),
GFP_
KERNEL
);
if
(
mark
==
NULL
)
if
(
mark
==
NULL
)
return
-
ENOMEM
;
return
-
ENOMEM
;
...
...
net/netfilter/nf_conntrack_core.c
View file @
9bb862be
...
@@ -1336,7 +1336,6 @@ static void nf_conntrack_cleanup_init_net(void)
...
@@ -1336,7 +1336,6 @@ static void nf_conntrack_cleanup_init_net(void)
while
(
untrack_refs
()
>
0
)
while
(
untrack_refs
()
>
0
)
schedule
();
schedule
();
nf_conntrack_helper_fini
();
nf_conntrack_proto_fini
();
nf_conntrack_proto_fini
();
#ifdef CONFIG_NF_CONNTRACK_ZONES
#ifdef CONFIG_NF_CONNTRACK_ZONES
nf_ct_extend_unregister
(
&
nf_ct_zone_extend
);
nf_ct_extend_unregister
(
&
nf_ct_zone_extend
);
...
@@ -1354,6 +1353,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
...
@@ -1354,6 +1353,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
}
}
nf_ct_free_hashtable
(
net
->
ct
.
hash
,
net
->
ct
.
htable_size
);
nf_ct_free_hashtable
(
net
->
ct
.
hash
,
net
->
ct
.
htable_size
);
nf_conntrack_helper_fini
(
net
);
nf_conntrack_timeout_fini
(
net
);
nf_conntrack_timeout_fini
(
net
);
nf_conntrack_ecache_fini
(
net
);
nf_conntrack_ecache_fini
(
net
);
nf_conntrack_tstamp_fini
(
net
);
nf_conntrack_tstamp_fini
(
net
);
...
@@ -1504,10 +1504,6 @@ static int nf_conntrack_init_init_net(void)
...
@@ -1504,10 +1504,6 @@ static int nf_conntrack_init_init_net(void)
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
err_proto
;
goto
err_proto
;
ret
=
nf_conntrack_helper_init
();
if
(
ret
<
0
)
goto
err_helper
;
#ifdef CONFIG_NF_CONNTRACK_ZONES
#ifdef CONFIG_NF_CONNTRACK_ZONES
ret
=
nf_ct_extend_register
(
&
nf_ct_zone_extend
);
ret
=
nf_ct_extend_register
(
&
nf_ct_zone_extend
);
if
(
ret
<
0
)
if
(
ret
<
0
)
...
@@ -1525,10 +1521,8 @@ static int nf_conntrack_init_init_net(void)
...
@@ -1525,10 +1521,8 @@ static int nf_conntrack_init_init_net(void)
#ifdef CONFIG_NF_CONNTRACK_ZONES
#ifdef CONFIG_NF_CONNTRACK_ZONES
err_extend:
err_extend:
nf_conntrack_helper_fini
();
#endif
err_helper:
nf_conntrack_proto_fini
();
nf_conntrack_proto_fini
();
#endif
err_proto:
err_proto:
return
ret
;
return
ret
;
}
}
...
@@ -1589,9 +1583,14 @@ static int nf_conntrack_init_net(struct net *net)
...
@@ -1589,9 +1583,14 @@ static int nf_conntrack_init_net(struct net *net)
ret
=
nf_conntrack_timeout_init
(
net
);
ret
=
nf_conntrack_timeout_init
(
net
);
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
err_timeout
;
goto
err_timeout
;
ret
=
nf_conntrack_helper_init
(
net
);
if
(
ret
<
0
)
goto
err_helper
;
return
0
;
return
0
;
err_helper:
nf_conntrack_timeout_fini
(
net
);
err_timeout:
err_timeout:
nf_conntrack_ecache_fini
(
net
);
nf_conntrack_ecache_fini
(
net
);
err_ecache:
err_ecache:
...
...
net/netfilter/nf_conntrack_ecache.c
View file @
9bb862be
...
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
...
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
int
nf_conntrack_register_notifier
(
struct
net
*
net
,
int
nf_conntrack_register_notifier
(
struct
net
*
net
,
struct
nf_ct_event_notifier
*
new
)
struct
nf_ct_event_notifier
*
new
)
{
{
int
ret
=
0
;
int
ret
;
struct
nf_ct_event_notifier
*
notify
;
struct
nf_ct_event_notifier
*
notify
;
mutex_lock
(
&
nf_ct_ecache_mutex
);
mutex_lock
(
&
nf_ct_ecache_mutex
);
...
@@ -95,8 +95,7 @@ int nf_conntrack_register_notifier(struct net *net,
...
@@ -95,8 +95,7 @@ int nf_conntrack_register_notifier(struct net *net,
goto
out_unlock
;
goto
out_unlock
;
}
}
rcu_assign_pointer
(
net
->
ct
.
nf_conntrack_event_cb
,
new
);
rcu_assign_pointer
(
net
->
ct
.
nf_conntrack_event_cb
,
new
);
mutex_unlock
(
&
nf_ct_ecache_mutex
);
ret
=
0
;
return
ret
;
out_unlock:
out_unlock:
mutex_unlock
(
&
nf_ct_ecache_mutex
);
mutex_unlock
(
&
nf_ct_ecache_mutex
);
...
@@ -121,7 +120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
...
@@ -121,7 +120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
int
nf_ct_expect_register_notifier
(
struct
net
*
net
,
int
nf_ct_expect_register_notifier
(
struct
net
*
net
,
struct
nf_exp_event_notifier
*
new
)
struct
nf_exp_event_notifier
*
new
)
{
{
int
ret
=
0
;
int
ret
;
struct
nf_exp_event_notifier
*
notify
;
struct
nf_exp_event_notifier
*
notify
;
mutex_lock
(
&
nf_ct_ecache_mutex
);
mutex_lock
(
&
nf_ct_ecache_mutex
);
...
@@ -132,8 +131,7 @@ int nf_ct_expect_register_notifier(struct net *net,
...
@@ -132,8 +131,7 @@ int nf_ct_expect_register_notifier(struct net *net,
goto
out_unlock
;
goto
out_unlock
;
}
}
rcu_assign_pointer
(
net
->
ct
.
nf_expect_event_cb
,
new
);
rcu_assign_pointer
(
net
->
ct
.
nf_expect_event_cb
,
new
);
mutex_unlock
(
&
nf_ct_ecache_mutex
);
ret
=
0
;
return
ret
;
out_unlock:
out_unlock:
mutex_unlock
(
&
nf_ct_ecache_mutex
);
mutex_unlock
(
&
nf_ct_ecache_mutex
);
...
...
net/netfilter/nf_conntrack_helper.c
View file @
9bb862be
...
@@ -34,6 +34,67 @@ static struct hlist_head *nf_ct_helper_hash __read_mostly;
...
@@ -34,6 +34,67 @@ static struct hlist_head *nf_ct_helper_hash __read_mostly;
static
unsigned
int
nf_ct_helper_hsize
__read_mostly
;
static
unsigned
int
nf_ct_helper_hsize
__read_mostly
;
static
unsigned
int
nf_ct_helper_count
__read_mostly
;
static
unsigned
int
nf_ct_helper_count
__read_mostly
;
static
bool
nf_ct_auto_assign_helper
__read_mostly
=
true
;
module_param_named
(
nf_conntrack_helper
,
nf_ct_auto_assign_helper
,
bool
,
0644
);
MODULE_PARM_DESC
(
nf_conntrack_helper
,
"Enable automatic conntrack helper assignment (default 1)"
);
#ifdef CONFIG_SYSCTL
static
struct
ctl_table
helper_sysctl_table
[]
=
{
{
.
procname
=
"nf_conntrack_helper"
,
.
data
=
&
init_net
.
ct
.
sysctl_auto_assign_helper
,
.
maxlen
=
sizeof
(
unsigned
int
),
.
mode
=
0644
,
.
proc_handler
=
proc_dointvec
,
},
{}
};
static
int
nf_conntrack_helper_init_sysctl
(
struct
net
*
net
)
{
struct
ctl_table
*
table
;
table
=
kmemdup
(
helper_sysctl_table
,
sizeof
(
helper_sysctl_table
),
GFP_KERNEL
);
if
(
!
table
)
goto
out
;
table
[
0
].
data
=
&
net
->
ct
.
sysctl_auto_assign_helper
;
net
->
ct
.
helper_sysctl_header
=
register_net_sysctl
(
net
,
"net/netfilter"
,
table
);
if
(
!
net
->
ct
.
helper_sysctl_header
)
{
pr_err
(
"nf_conntrack_helper: can't register to sysctl.
\n
"
);
goto
out_register
;
}
return
0
;
out_register:
kfree
(
table
);
out:
return
-
ENOMEM
;
}
static
void
nf_conntrack_helper_fini_sysctl
(
struct
net
*
net
)
{
struct
ctl_table
*
table
;
table
=
net
->
ct
.
helper_sysctl_header
->
ctl_table_arg
;
unregister_net_sysctl_table
(
net
->
ct
.
helper_sysctl_header
);
kfree
(
table
);
}
#else
static
int
nf_conntrack_helper_init_sysctl
(
struct
net
*
net
)
{
return
0
;
}
static
void
nf_conntrack_helper_fini_sysctl
(
struct
net
*
net
)
{
}
#endif
/* CONFIG_SYSCTL */
/* Stupid hash, but collision free for the default registrations of the
/* Stupid hash, but collision free for the default registrations of the
* helpers currently in the kernel. */
* helpers currently in the kernel. */
...
@@ -118,17 +179,38 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
...
@@ -118,17 +179,38 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
{
{
struct
nf_conntrack_helper
*
helper
=
NULL
;
struct
nf_conntrack_helper
*
helper
=
NULL
;
struct
nf_conn_help
*
help
;
struct
nf_conn_help
*
help
;
struct
net
*
net
=
nf_ct_net
(
ct
);
int
ret
=
0
;
int
ret
=
0
;
/* We already got a helper explicitly attached. The function
* nf_conntrack_alter_reply - in case NAT is in use - asks for looking
* the helper up again. Since now the user is in full control of
* making consistent helper configurations, skip this automatic
* re-lookup, otherwise we'll lose the helper.
*/
if
(
test_bit
(
IPS_HELPER_BIT
,
&
ct
->
status
))
return
0
;
if
(
tmpl
!=
NULL
)
{
if
(
tmpl
!=
NULL
)
{
help
=
nfct_help
(
tmpl
);
help
=
nfct_help
(
tmpl
);
if
(
help
!=
NULL
)
if
(
help
!=
NULL
)
{
helper
=
help
->
helper
;
helper
=
help
->
helper
;
set_bit
(
IPS_HELPER_BIT
,
&
ct
->
status
);
}
}
}
help
=
nfct_help
(
ct
);
help
=
nfct_help
(
ct
);
if
(
helper
==
NULL
)
if
(
net
->
ct
.
sysctl_auto_assign_helper
&&
helper
==
NULL
)
{
helper
=
__nf_ct_helper_find
(
&
ct
->
tuplehash
[
IP_CT_DIR_REPLY
].
tuple
);
helper
=
__nf_ct_helper_find
(
&
ct
->
tuplehash
[
IP_CT_DIR_REPLY
].
tuple
);
if
(
unlikely
(
!
net
->
ct
.
auto_assign_helper_warned
&&
helper
))
{
pr_info
(
"nf_conntrack: automatic helper "
"assignment is deprecated and it will "
"be removed soon. Use the iptables CT target "
"to attach helpers instead.
\n
"
);
net
->
ct
.
auto_assign_helper_warned
=
true
;
}
}
if
(
helper
==
NULL
)
{
if
(
helper
==
NULL
)
{
if
(
help
)
if
(
help
)
RCU_INIT_POINTER
(
help
->
helper
,
NULL
);
RCU_INIT_POINTER
(
help
->
helper
,
NULL
);
...
@@ -315,28 +397,44 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
...
@@ -315,28 +397,44 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
.
id
=
NF_CT_EXT_HELPER
,
.
id
=
NF_CT_EXT_HELPER
,
};
};
int
nf_conntrack_helper_init
(
void
)
int
nf_conntrack_helper_init
(
struct
net
*
net
)
{
{
int
err
;
int
err
;
net
->
ct
.
auto_assign_helper_warned
=
false
;
net
->
ct
.
sysctl_auto_assign_helper
=
nf_ct_auto_assign_helper
;
if
(
net_eq
(
net
,
&
init_net
))
{
nf_ct_helper_hsize
=
1
;
/* gets rounded up to use one page */
nf_ct_helper_hsize
=
1
;
/* gets rounded up to use one page */
nf_ct_helper_hash
=
nf_ct_alloc_hashtable
(
&
nf_ct_helper_hsize
,
0
);
nf_ct_helper_hash
=
nf_ct_alloc_hashtable
(
&
nf_ct_helper_hsize
,
0
);
if
(
!
nf_ct_helper_hash
)
if
(
!
nf_ct_helper_hash
)
return
-
ENOMEM
;
return
-
ENOMEM
;
err
=
nf_ct_extend_register
(
&
helper_extend
);
err
=
nf_ct_extend_register
(
&
helper_extend
);
if
(
err
<
0
)
if
(
err
<
0
)
goto
err1
;
goto
err1
;
}
err
=
nf_conntrack_helper_init_sysctl
(
net
);
if
(
err
<
0
)
goto
out_sysctl
;
return
0
;
return
0
;
out_sysctl:
if
(
net_eq
(
net
,
&
init_net
))
nf_ct_extend_unregister
(
&
helper_extend
);
err1:
err1:
nf_ct_free_hashtable
(
nf_ct_helper_hash
,
nf_ct_helper_hsize
);
nf_ct_free_hashtable
(
nf_ct_helper_hash
,
nf_ct_helper_hsize
);
return
err
;
return
err
;
}
}
void
nf_conntrack_helper_fini
(
void
)
void
nf_conntrack_helper_fini
(
struct
net
*
net
)
{
{
nf_conntrack_helper_fini_sysctl
(
net
);
if
(
net_eq
(
net
,
&
init_net
))
{
nf_ct_extend_unregister
(
&
helper_extend
);
nf_ct_extend_unregister
(
&
helper_extend
);
nf_ct_free_hashtable
(
nf_ct_helper_hash
,
nf_ct_helper_hsize
);
nf_ct_free_hashtable
(
nf_ct_helper_hash
,
nf_ct_helper_hsize
);
}
}
}
net/netfilter/nf_conntrack_netlink.c
View file @
9bb862be
...
@@ -2080,7 +2080,15 @@ static int
...
@@ -2080,7 +2080,15 @@ static int
ctnetlink_change_expect
(
struct
nf_conntrack_expect
*
x
,
ctnetlink_change_expect
(
struct
nf_conntrack_expect
*
x
,
const
struct
nlattr
*
const
cda
[])
const
struct
nlattr
*
const
cda
[])
{
{
return
-
EOPNOTSUPP
;
if
(
cda
[
CTA_EXPECT_TIMEOUT
])
{
if
(
!
del_timer
(
&
x
->
timeout
))
return
-
ETIME
;
x
->
timeout
.
expires
=
jiffies
+
ntohl
(
nla_get_be32
(
cda
[
CTA_EXPECT_TIMEOUT
]))
*
HZ
;
add_timer
(
&
x
->
timeout
);
}
return
0
;
}
}
static
const
struct
nla_policy
exp_nat_nla_policy
[
CTA_EXPECT_NAT_MAX
+
1
]
=
{
static
const
struct
nla_policy
exp_nat_nla_policy
[
CTA_EXPECT_NAT_MAX
+
1
]
=
{
...
...
security/selinux/nlmsgtab.c
View file @
9bb862be
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
#include <linux/netlink.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/rtnetlink.h>
#include <linux/if.h>
#include <linux/if.h>
#include <linux/netfilter_ipv4/ip_queue.h>
#include <linux/inet_diag.h>
#include <linux/inet_diag.h>
#include <linux/xfrm.h>
#include <linux/xfrm.h>
#include <linux/audit.h>
#include <linux/audit.h>
...
@@ -70,12 +69,6 @@ static struct nlmsg_perm nlmsg_route_perms[] =
...
@@ -70,12 +69,6 @@ static struct nlmsg_perm nlmsg_route_perms[] =
{
RTM_SETDCB
,
NETLINK_ROUTE_SOCKET__NLMSG_WRITE
},
{
RTM_SETDCB
,
NETLINK_ROUTE_SOCKET__NLMSG_WRITE
},
};
};
static
struct
nlmsg_perm
nlmsg_firewall_perms
[]
=
{
{
IPQM_MODE
,
NETLINK_FIREWALL_SOCKET__NLMSG_WRITE
},
{
IPQM_VERDICT
,
NETLINK_FIREWALL_SOCKET__NLMSG_WRITE
},
};
static
struct
nlmsg_perm
nlmsg_tcpdiag_perms
[]
=
static
struct
nlmsg_perm
nlmsg_tcpdiag_perms
[]
=
{
{
{
TCPDIAG_GETSOCK
,
NETLINK_TCPDIAG_SOCKET__NLMSG_READ
},
{
TCPDIAG_GETSOCK
,
NETLINK_TCPDIAG_SOCKET__NLMSG_READ
},
...
@@ -145,12 +138,6 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
...
@@ -145,12 +138,6 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
sizeof
(
nlmsg_route_perms
));
sizeof
(
nlmsg_route_perms
));
break
;
break
;
case
SECCLASS_NETLINK_FIREWALL_SOCKET
:
case
SECCLASS_NETLINK_IP6FW_SOCKET
:
err
=
nlmsg_perm
(
nlmsg_type
,
perm
,
nlmsg_firewall_perms
,
sizeof
(
nlmsg_firewall_perms
));
break
;
case
SECCLASS_NETLINK_TCPDIAG_SOCKET
:
case
SECCLASS_NETLINK_TCPDIAG_SOCKET
:
err
=
nlmsg_perm
(
nlmsg_type
,
perm
,
nlmsg_tcpdiag_perms
,
err
=
nlmsg_perm
(
nlmsg_type
,
perm
,
nlmsg_tcpdiag_perms
,
sizeof
(
nlmsg_tcpdiag_perms
));
sizeof
(
nlmsg_tcpdiag_perms
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment