Merge branch 'master' of git://1984.lsi.us.es/net-next

9bb862be · David S. Miller · b44907e6 · d16cf20e · 9bb862be · 9bb862be
Commit 9bb862be authored May 08, 2012 by David S. Miller
34 changed files
--- a/Documentation/ABI/removed/ip_queue
+++ b/Documentation/ABI/removed/ip_queue
+What:		ip_queue
+Date:		finally removed in kernel v3.5.0
+Contact:	Pablo Neira Ayuso <pablo@netfilter.org>
+Description:
+	ip_queue has been replaced by nfnetlink_queue which provides
+	more advanced queueing mechanism to user-space. The ip_queue
+	module was already announced to become obsolete years ago.
+
+Users:
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1301,13 +1301,22 @@ bridge-nf-call-ip6tables - BOOLEAN
 bridge-nf-filter-vlan-tagged - BOOLEAN
 	1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
 	0 : disable this.
-	Default: 1
+	Default: 0

 bridge-nf-filter-pppoe-tagged - BOOLEAN
 	1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
 	0 : disable this.
-	Default: 1
+	Default: 0

+bridge-nf-pass-vlan-input-dev - BOOLEAN
+	1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan
+	interface on the bridge and set the netfilter input device to the vlan.
+	This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT
+	target work with vlan-on-top-of-bridge interfaces.  When no matching
+	vlan interface is found, or this switch is off, the input device is
+	set to the bridge interface.
+	0: disable bridge netfilter vlan interface lookup.
+	Default: 0

 proc/sys/net/sctp/* Variables:


--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -89,6 +89,7 @@
 #define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
 #define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */

+/* Initial bits allowed in backup server */
 #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
 				  IP_VS_CONN_F_NOOUTPUT | \
 				  IP_VS_CONN_F_INACTIVE | \
@@ -97,6 +98,10 @@
 				  IP_VS_CONN_F_TEMPLATE \
 				 )

+/* Bits allowed to update in backup server */
+#define IP_VS_CONN_F_BACKUP_UPD_MASK (IP_VS_CONN_F_INACTIVE | \
+				      IP_VS_CONN_F_SEQ_MASK)
+
 /* Flags that are not sent to backup server start from bit 16 */
 #define IP_VS_CONN_F_NFCT	(1 << 16)	/* use netfilter conntrack */


--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -83,6 +83,10 @@ enum ip_conntrack_status {
 	/* Conntrack is a fake untracked entry */
 	IPS_UNTRACKED_BIT = 12,
 	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+
+	/* Conntrack got a helper explicitly attached via CT target. */
+	IPS_HELPER_BIT = 13,
+	IPS_HELPER = (1 << IPS_HELPER_BIT),
 };

 /* Connection tracking event types */

--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
-header-y += ip_queue.h
 header-y += ip_tables.h
 header-y += ipt_CLUSTERIP.h
 header-y += ipt_ECN.h

--- a/include/linux/netfilter_ipv4/ip_queue.h
+++ b/include/linux/netfilter_ipv4/ip_queue.h
-/*
- * This is a module which is used for queueing IPv4 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2000 James Morris, this code is GPL.
- */
-#ifndef _IP_QUEUE_H
-#define _IP_QUEUE_H
-
-#ifdef __KERNEL__
-#ifdef DEBUG_IPQ
-#define QDEBUG(x...) printk(KERN_DEBUG ## x)
-#else
-#define QDEBUG(x...)
-#endif  /* DEBUG_IPQ */
-#else
-#include <net/if.h>
-#endif	/* ! __KERNEL__ */
-
-/* Messages sent from kernel */
-typedef struct ipq_packet_msg {
-	unsigned long packet_id;	/* ID of queued packet */
-	unsigned long mark;		/* Netfilter mark value */
-	long timestamp_sec;		/* Packet arrival time (seconds) */
-	long timestamp_usec;		/* Packet arrvial time (+useconds) */
-	unsigned int hook;		/* Netfilter hook we rode in on */
-	char indev_name[IFNAMSIZ];	/* Name of incoming interface */
-	char outdev_name[IFNAMSIZ];	/* Name of outgoing interface */
-	__be16 hw_protocol;		/* Hardware protocol (network order) */
-	unsigned short hw_type;		/* Hardware type */
-	unsigned char hw_addrlen;	/* Hardware address length */
-	unsigned char hw_addr[8];	/* Hardware address */
-	size_t data_len;		/* Length of packet data */
-	unsigned char payload[0];	/* Optional packet data */
-} ipq_packet_msg_t;
-
-/* Messages sent from userspace */
-typedef struct ipq_mode_msg {
-	unsigned char value;		/* Requested mode */
-	size_t range;			/* Optional range of packet requested */
-} ipq_mode_msg_t;
-
-typedef struct ipq_verdict_msg {
-	unsigned int value;		/* Verdict to hand to netfilter */
-	unsigned long id;		/* Packet ID for this verdict */
-	size_t data_len;		/* Length of replacement data */
-	unsigned char payload[0];	/* Optional replacement packet */
-} ipq_verdict_msg_t;
-
-typedef struct ipq_peer_msg {
-	union {
-		ipq_verdict_msg_t verdict;
-		ipq_mode_msg_t mode;
-	} msg;
-} ipq_peer_msg_t;
-
-/* Packet delivery modes */
-enum {
-	IPQ_COPY_NONE,		/* Initial mode, packets are dropped */
-	IPQ_COPY_META,		/* Copy metadata */
-	IPQ_COPY_PACKET		/* Copy metadata + packet (range) */
-};
-#define IPQ_COPY_MAX IPQ_COPY_PACKET
-
-/* Types of messages */
-#define IPQM_BASE	0x10	/* standard netlink messages below this */
-#define IPQM_MODE	(IPQM_BASE + 1)		/* Mode request from peer */
-#define IPQM_VERDICT	(IPQM_BASE + 2)		/* Verdict from peer */ 
-#define IPQM_PACKET	(IPQM_BASE + 3)		/* Packet from kernel */
-#define IPQM_MAX	(IPQM_BASE + 4)
-
-#endif /*_IP_QUEUE_H*/
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -7,7 +7,7 @@
 #define NETLINK_ROUTE		0	/* Routing/device hook				*/
 #define NETLINK_UNUSED		1	/* Unused number				*/
 #define NETLINK_USERSOCK	2	/* Reserved for user mode socket protocols 	*/
-#define NETLINK_FIREWALL	3	/* Firewalling hook				*/
+#define NETLINK_FIREWALL	3	/* Unused number, formerly ip_queue		*/
 #define NETLINK_SOCK_DIAG	4	/* socket monitoring				*/
 #define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
 #define NETLINK_XFRM		6	/* ipsec */

--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -504,6 +504,7 @@ struct ip_vs_conn {
 						 * state transition triggerd
 						 * synchronization
 						 */
+	unsigned long		sync_endtime;	/* jiffies + sent_retries */

 	/* Control members */
 	struct ip_vs_conn       *control;       /* Master control connection */
@@ -783,6 +784,16 @@ struct ip_vs_app {
 	void (*timeout_change)(struct ip_vs_app *app, int flags);
 };

+struct ipvs_master_sync_state {
+	struct list_head	sync_queue;
+	struct ip_vs_sync_buff	*sync_buff;
+	int			sync_queue_len;
+	unsigned int		sync_queue_delay;
+	struct task_struct	*master_thread;
+	struct delayed_work	master_wakeup_work;
+	struct netns_ipvs	*ipvs;
+};
+
 /* IPVS in network namespace */
 struct netns_ipvs {
 	int			gen;		/* Generation */
@@ -869,10 +880,15 @@ struct netns_ipvs {
 #endif
 	int			sysctl_snat_reroute;
 	int			sysctl_sync_ver;
+	int			sysctl_sync_ports;
+	int			sysctl_sync_qlen_max;
+	int			sysctl_sync_sock_size;
 	int			sysctl_cache_bypass;
 	int			sysctl_expire_nodest_conn;
 	int			sysctl_expire_quiescent_template;
 	int			sysctl_sync_threshold[2];
+	unsigned int		sysctl_sync_refresh_period;
+	int			sysctl_sync_retries;
 	int			sysctl_nat_icmp_send;

 	/* ip_vs_lblc */
@@ -888,13 +904,11 @@ struct netns_ipvs {
 	spinlock_t		est_lock;
 	struct timer_list	est_timer;	/* Estimation timer */
 	/* ip_vs_sync */
-	struct list_head	sync_queue;
 	spinlock_t		sync_lock;
-	struct ip_vs_sync_buff  *sync_buff;
+	struct ipvs_master_sync_state *ms;
 	spinlock_t		sync_buff_lock;
-	struct sockaddr_in	sync_mcast_addr;
-	struct task_struct	*master_thread;
-	struct task_struct	*backup_thread;
+	struct task_struct	**backup_threads;
+	int			threads_mask;
 	int			send_mesg_maxlen;
 	int			recv_mesg_maxlen;
 	volatile int		sync_state;
@@ -911,6 +925,14 @@ struct netns_ipvs {
 #define DEFAULT_SYNC_THRESHOLD	3
 #define DEFAULT_SYNC_PERIOD	50
 #define DEFAULT_SYNC_VER	1
+#define DEFAULT_SYNC_REFRESH_PERIOD	(0U * HZ)
+#define DEFAULT_SYNC_RETRIES		0
+#define IPVS_SYNC_WAKEUP_RATE	8
+#define IPVS_SYNC_QLEN_MAX	(IPVS_SYNC_WAKEUP_RATE * 4)
+#define IPVS_SYNC_SEND_DELAY	(HZ / 50)
+#define IPVS_SYNC_CHECK_PERIOD	HZ
+#define IPVS_SYNC_FLUSH_TIME	(HZ * 2)
+#define IPVS_SYNC_PORTS_MAX	(1 << 6)

 #ifdef CONFIG_SYSCTL

@@ -921,7 +943,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)

 static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
 {
-	return ipvs->sysctl_sync_threshold[1];
+	return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
+}
+
+static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
+{
+	return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
+}
+
+static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_retries;
 }

 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
@@ -929,6 +961,21 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
 	return ipvs->sysctl_sync_ver;
 }

+static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
+{
+	return ACCESS_ONCE(ipvs->sysctl_sync_ports);
+}
+
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_qlen_max;
+}
+
+static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_sock_size;
+}
+
 #else

 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -941,11 +988,36 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
 	return DEFAULT_SYNC_PERIOD;
 }

+static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
+{
+	return DEFAULT_SYNC_REFRESH_PERIOD;
+}
+
+static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
+{
+	return DEFAULT_SYNC_RETRIES & 3;
+}
+
 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
 {
 	return DEFAULT_SYNC_VER;
 }

+static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
+{
+	return 1;
+}
+
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+	return IPVS_SYNC_QLEN_MAX;
+}
+
+static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
+{
+	return 0;
+}
+
 #endif

 /*
@@ -1185,7 +1257,6 @@ extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
 extern struct ip_vs_stats ip_vs_stats;
 extern int sysctl_ip_vs_sync_ver;

-extern void ip_vs_sync_switch_mode(struct net *net, int mode);
 extern struct ip_vs_service *
 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		  const union nf_inet_addr *vaddr, __be16 vport);
@@ -1219,7 +1290,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
 			     __u8 syncid);
 extern int stop_sync_thread(struct net *net, int state);
-extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);


 /*

--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -321,14 +321,8 @@ extern unsigned int nf_conntrack_max;
 extern unsigned int nf_conntrack_hash_rnd;
 void init_nf_conntrack_hash_rnd(void);

-#define NF_CT_STAT_INC(net, count)	\
-	__this_cpu_inc((net)->ct.stat->count)
-#define NF_CT_STAT_INC_ATOMIC(net, count)		\
-do {							\
-	local_bh_disable();				\
-	__this_cpu_inc((net)->ct.stat->count);		\
-	local_bh_enable();				\
-} while (0)
+#define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
+#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)

 #define MODULE_ALIAS_NFCT_HELPER(helper) \
        MODULE_ALIAS("nfct-helper-" helper)

--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -60,8 +60,8 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
 	return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
 }

-extern int nf_conntrack_helper_init(void);
-extern void nf_conntrack_helper_fini(void);
+extern int nf_conntrack_helper_init(struct net *net);
+extern void nf_conntrack_helper_fini(struct net *net);

 extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
 				       unsigned int protoff,

--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -26,11 +26,14 @@ struct netns_ct {
 	int			sysctl_tstamp;
 	int			sysctl_checksum;
 	unsigned int		sysctl_log_invalid; /* Log invalid packets */
+	int			sysctl_auto_assign_helper;
+	bool			auto_assign_helper_warned;
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_header;
 	struct ctl_table_header	*acct_sysctl_header;
 	struct ctl_table_header	*tstamp_sysctl_header;
 	struct ctl_table_header	*event_sysctl_header;
+	struct ctl_table_header	*helper_sysctl_header;
 #endif
 	char			*slabname;
 };

--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -54,12 +54,14 @@ static int brnf_call_ip6tables __read_mostly = 1;
 static int brnf_call_arptables __read_mostly = 1;
 static int brnf_filter_vlan_tagged __read_mostly = 0;
 static int brnf_filter_pppoe_tagged __read_mostly = 0;
+static int brnf_pass_vlan_indev __read_mostly = 0;
 #else
 #define brnf_call_iptables 1
 #define brnf_call_ip6tables 1
 #define brnf_call_arptables 1
 #define brnf_filter_vlan_tagged 0
 #define brnf_filter_pppoe_tagged 0
+#define brnf_pass_vlan_indev 0
 #endif

 #define IS_IP(skb) \
@@ -503,6 +505,19 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 	return 0;
 }

+static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct net_device *vlan, *br;
+
+	br = bridge_parent(dev);
+	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
+		return br;
+
+	vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK);
+
+	return vlan ? vlan : br;
+}
+
 /* Some common code for IPv4/IPv6 */
 static struct net_device *setup_pre_routing(struct sk_buff *skb)
 {
@@ -515,7 +530,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)

 	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
 	nf_bridge->physindev = skb->dev;
-	skb->dev = bridge_parent(skb->dev);
+	skb->dev = brnf_get_logical_dev(skb, skb->dev);
 	if (skb->protocol == htons(ETH_P_8021Q))
 		nf_bridge->mask |= BRNF_8021Q;
 	else if (skb->protocol == htons(ETH_P_PPP_SES))
@@ -774,7 +789,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	else
 		skb->protocol = htons(ETH_P_IPV6);

-	NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
+	NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent,
 		br_nf_forward_finish);

 	return NF_STOLEN;
@@ -1002,6 +1017,13 @@ static ctl_table brnf_table[] = {
 		.mode		= 0644,
 		.proc_handler	= brnf_sysctl_call_tables,
 	},
+	{
+		.procname	= "bridge-nf-pass-vlan-input-dev",
+		.data		= &brnf_pass_vlan_indev,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= brnf_sysctl_call_tables,
+	},
 	{ }
 };
 #endif

--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -259,7 +259,9 @@ static struct lock_class_key af_callback_keys[AF_MAX];

 /* Run time adjustable parameters. */
 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+EXPORT_SYMBOL(sysctl_wmem_max);
 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+EXPORT_SYMBOL(sysctl_rmem_max);
 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;


--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o

 # just filtering instance of ARP tables for now
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
-
-obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
-
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
-/*
- * This is a module which is used for queueing IPv4 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
- * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/security.h>
-#include <linux/net.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <net/netfilter/nf_queue.h>
-#include <net/ip.h>
-
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip_queue"
-#define NET_IPQ_QMAX 2088
-#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
-
-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
-
-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_SPINLOCK(queue_lock);
-static int peer_pid __read_mostly;
-static unsigned int copy_range __read_mostly;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
-static DEFINE_MUTEX(ipqnl_mutex);
-
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
-       list_add_tail(&entry->list, &queue_list);
-       queue_total++;
-}
-
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status = 0;
-
-	switch(mode) {
-	case IPQ_COPY_NONE:
-	case IPQ_COPY_META:
-		copy_mode = mode;
-		copy_range = 0;
-		break;
-
-	case IPQ_COPY_PACKET:
-		if (range > 0xFFFF)
-			range = 0xFFFF;
-		copy_range = range;
-		copy_mode = mode;
-		break;
-
-	default:
-		status = -EINVAL;
-
-	}
-	return status;
-}
-
-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
-
-static inline void
-__ipq_reset(void)
-{
-	peer_pid = 0;
-	net_disable_timestamp();
-	__ipq_set_mode(IPQ_COPY_NONE, 0);
-	__ipq_flush(NULL, 0);
-}
-
-static struct nf_queue_entry *
-ipq_find_dequeue_entry(unsigned long id)
-{
-	struct nf_queue_entry *entry = NULL, *i;
-
-	spin_lock_bh(&queue_lock);
-
-	list_for_each_entry(i, &queue_list, list) {
-		if ((unsigned long)i == id) {
-			entry = i;
-			break;
-		}
-	}
-
-	if (entry) {
-		list_del(&entry->list);
-		queue_total--;
-	}
-
-	spin_unlock_bh(&queue_lock);
-	return entry;
-}
-
-static void
-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct nf_queue_entry *entry, *next;
-
-	list_for_each_entry_safe(entry, next, &queue_list, list) {
-		if (!cmpfn || cmpfn(entry, data)) {
-			list_del(&entry->list);
-			queue_total--;
-			nf_reinject(entry, NF_DROP);
-		}
-	}
-}
-
-static void
-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-	spin_lock_bh(&queue_lock);
-	__ipq_flush(cmpfn, data);
-	spin_unlock_bh(&queue_lock);
-}
-
-static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
-{
-	sk_buff_data_t old_tail;
-	size_t size = 0;
-	size_t data_len = 0;
-	struct sk_buff *skb;
-	struct ipq_packet_msg *pmsg;
-	struct nlmsghdr *nlh;
-	struct timeval tv;
-
-	switch (ACCESS_ONCE(copy_mode)) {
-	case IPQ_COPY_META:
-	case IPQ_COPY_NONE:
-		size = NLMSG_SPACE(sizeof(*pmsg));
-		break;
-
-	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-		    (*errp = skb_checksum_help(entry->skb)))
-			return NULL;
-
-		data_len = ACCESS_ONCE(copy_range);
-		if (data_len == 0 || data_len > entry->skb->len)
-			data_len = entry->skb->len;
-
-		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
-		break;
-
-	default:
-		*errp = -EINVAL;
-		return NULL;
-	}
-
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb)
-		goto nlmsg_failure;
-
-	old_tail = skb->tail;
-	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
-	pmsg = NLMSG_DATA(nlh);
-	memset(pmsg, 0, sizeof(*pmsg));
-
-	pmsg->packet_id       = (unsigned long )entry;
-	pmsg->data_len        = data_len;
-	tv = ktime_to_timeval(entry->skb->tstamp);
-	pmsg->timestamp_sec   = tv.tv_sec;
-	pmsg->timestamp_usec  = tv.tv_usec;
-	pmsg->mark            = entry->skb->mark;
-	pmsg->hook            = entry->hook;
-	pmsg->hw_protocol     = entry->skb->protocol;
-
-	if (entry->indev)
-		strcpy(pmsg->indev_name, entry->indev->name);
-	else
-		pmsg->indev_name[0] = '\0';
-
-	if (entry->outdev)
-		strcpy(pmsg->outdev_name, entry->outdev->name);
-	else
-		pmsg->outdev_name[0] = '\0';
-
-	if (entry->indev && entry->skb->dev &&
-	    entry->skb->mac_header != entry->skb->network_header) {
-		pmsg->hw_type = entry->skb->dev->type;
-		pmsg->hw_addrlen = dev_parse_header(entry->skb,
-						    pmsg->hw_addr);
-	}
-
-	if (data_len)
-		if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
-			BUG();
-
-	nlh->nlmsg_len = skb->tail - old_tail;
-	return skb;
-
-nlmsg_failure:
-	kfree_skb(skb);
-	*errp = -EINVAL;
-	printk(KERN_ERR "ip_queue: error creating packet message\n");
-	return NULL;
-}
-
-static int
-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
-{
-	int status = -EINVAL;
-	struct sk_buff *nskb;
-
-	if (copy_mode == IPQ_COPY_NONE)
-		return -EAGAIN;
-
-	nskb = ipq_build_packet_message(entry, &status);
-	if (nskb == NULL)
-		return status;
-
-	spin_lock_bh(&queue_lock);
-
-	if (!peer_pid)
-		goto err_out_free_nskb;
-
-	if (queue_total >= queue_maxlen) {
-		queue_dropped++;
-		status = -ENOSPC;
-		if (net_ratelimit())
-			  printk (KERN_WARNING "ip_queue: full at %d entries, "
-				  "dropping packets(s). Dropped: %d\n", queue_total,
-				  queue_dropped);
-		goto err_out_free_nskb;
-	}
-
-	/* netlink_unicast will either free the nskb or attach it to a socket */
-	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
-	if (status < 0) {
-		queue_user_dropped++;
-		goto err_out_unlock;
-	}
-
-	__ipq_enqueue_entry(entry);
-
-	spin_unlock_bh(&queue_lock);
-	return status;
-
-err_out_free_nskb:
-	kfree_skb(nskb);
-
-err_out_unlock:
-	spin_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
-{
-	int diff;
-	struct iphdr *user_iph = (struct iphdr *)v->payload;
-	struct sk_buff *nskb;
-
-	if (v->data_len < sizeof(*user_iph))
-		return 0;
-	diff = v->data_len - e->skb->len;
-	if (diff < 0) {
-		if (pskb_trim(e->skb, v->data_len))
-			return -ENOMEM;
-	} else if (diff > 0) {
-		if (v->data_len > 0xFFFF)
-			return -EINVAL;
-		if (diff > skb_tailroom(e->skb)) {
-			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
-					       diff, GFP_ATOMIC);
-			if (!nskb) {
-				printk(KERN_WARNING "ip_queue: error "
-				      "in mangle, dropping packet\n");
-				return -ENOMEM;
-			}
-			kfree_skb(e->skb);
-			e->skb = nskb;
-		}
-		skb_put(e->skb, diff);
-	}
-	if (!skb_make_writable(e->skb, v->data_len))
-		return -ENOMEM;
-	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
-	e->skb->ip_summed = CHECKSUM_NONE;
-
-	return 0;
-}
-
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
-	struct nf_queue_entry *entry;
-
-	if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
-		return -EINVAL;
-
-	entry = ipq_find_dequeue_entry(vmsg->id);
-	if (entry == NULL)
-		return -ENOENT;
-	else {
-		int verdict = vmsg->value;
-
-		if (vmsg->data_len && vmsg->data_len == len)
-			if (ipq_mangle_ipv4(vmsg, entry) < 0)
-				verdict = NF_DROP;
-
-		nf_reinject(entry, verdict);
-		return 0;
-	}
-}
-
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status;
-
-	spin_lock_bh(&queue_lock);
-	status = __ipq_set_mode(mode, range);
-	spin_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
-		 unsigned char type, unsigned int len)
-{
-	int status = 0;
-
-	if (len < sizeof(*pmsg))
-		return -EINVAL;
-
-	switch (type) {
-	case IPQM_MODE:
-		status = ipq_set_mode(pmsg->msg.mode.value,
-				      pmsg->msg.mode.range);
-		break;
-
-	case IPQM_VERDICT:
-		status = ipq_set_verdict(&pmsg->msg.verdict,
-					 len - sizeof(*pmsg));
-		break;
-	default:
-		status = -EINVAL;
-	}
-	return status;
-}
-
-static int
-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
-{
-	if (entry->indev)
-		if (entry->indev->ifindex == ifindex)
-			return 1;
-	if (entry->outdev)
-		if (entry->outdev->ifindex == ifindex)
-			return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (entry->skb->nf_bridge) {
-		if (entry->skb->nf_bridge->physindev &&
-		    entry->skb->nf_bridge->physindev->ifindex == ifindex)
-			return 1;
-		if (entry->skb->nf_bridge->physoutdev &&
-		    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
-			return 1;
-	}
-#endif
-	return 0;
-}
-
-static void
-ipq_dev_drop(int ifindex)
-{
-	ipq_flush(dev_cmp, ifindex);
-}
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
-static inline void
-__ipq_rcv_skb(struct sk_buff *skb)
-{
-	int status, type, pid, flags;
-	unsigned int nlmsglen, skblen;
-	struct nlmsghdr *nlh;
-	bool enable_timestamp = false;
-
-	skblen = skb->len;
-	if (skblen < sizeof(*nlh))
-		return;
-
-	nlh = nlmsg_hdr(skb);
-	nlmsglen = nlh->nlmsg_len;
-	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
-		return;
-
-	pid = nlh->nlmsg_pid;
-	flags = nlh->nlmsg_flags;
-
-	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
-		RCV_SKB_FAIL(-EINVAL);
-
-	if (flags & MSG_TRUNC)
-		RCV_SKB_FAIL(-ECOMM);
-
-	type = nlh->nlmsg_type;
-	if (type < NLMSG_NOOP || type >= IPQM_MAX)
-		RCV_SKB_FAIL(-EINVAL);
-
-	if (type <= IPQM_BASE)
-		return;
-
-	if (!capable(CAP_NET_ADMIN))
-		RCV_SKB_FAIL(-EPERM);
-
-	spin_lock_bh(&queue_lock);
-
-	if (peer_pid) {
-		if (peer_pid != pid) {
-			spin_unlock_bh(&queue_lock);
-			RCV_SKB_FAIL(-EBUSY);
-		}
-	} else {
-		enable_timestamp = true;
-		peer_pid = pid;
-	}
-
-	spin_unlock_bh(&queue_lock);
-	if (enable_timestamp)
-		net_enable_timestamp();
-	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
-				  nlmsglen - NLMSG_LENGTH(0));
-	if (status < 0)
-		RCV_SKB_FAIL(status);
-
-	if (flags & NLM_F_ACK)
-		netlink_ack(skb, nlh, 0);
-}
-
-static void
-ipq_rcv_skb(struct sk_buff *skb)
-{
-	mutex_lock(&ipqnl_mutex);
-	__ipq_rcv_skb(skb);
-	mutex_unlock(&ipqnl_mutex);
-}
-
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
-		  unsigned long event, void *ptr)
-{
-	struct net_device *dev = ptr;
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	/* Drop any packets associated with the downed device */
-	if (event == NETDEV_DOWN)
-		ipq_dev_drop(dev->ifindex);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_dev_notifier = {
-	.notifier_call	= ipq_rcv_dev_event,
-};
-
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
-		 unsigned long event, void *ptr)
-{
-	struct netlink_notify *n = ptr;
-
-	if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
-		spin_lock_bh(&queue_lock);
-		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
-			__ipq_reset();
-		spin_unlock_bh(&queue_lock);
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_nl_notifier = {
-	.notifier_call	= ipq_rcv_nl_event,
-};
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *ipq_sysctl_header;
-
-static ctl_table ipq_table[] = {
-	{
-		.procname	= NET_IPQ_QMAX_NAME,
-		.data		= &queue_maxlen,
-		.maxlen		= sizeof(queue_maxlen),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{ }
-};
-#endif
-
-#ifdef CONFIG_PROC_FS
-static int ip_queue_show(struct seq_file *m, void *v)
-{
-	spin_lock_bh(&queue_lock);
-
-	seq_printf(m,
-		      "Peer PID          : %d\n"
-		      "Copy mode         : %hu\n"
-		      "Copy range        : %u\n"
-		      "Queue length      : %u\n"
-		      "Queue max. length : %u\n"
-		      "Queue dropped     : %u\n"
-		      "Netlink dropped   : %u\n",
-		      peer_pid,
-		      copy_mode,
-		      copy_range,
-		      queue_total,
-		      queue_maxlen,
-		      queue_dropped,
-		      queue_user_dropped);
-
-	spin_unlock_bh(&queue_lock);
-	return 0;
-}
-
-static int ip_queue_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ip_queue_show, NULL);
-}
-
-static const struct file_operations ip_queue_proc_fops = {
-	.open		= ip_queue_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.owner		= THIS_MODULE,
-};
-#endif
-
-static const struct nf_queue_handler nfqh = {
-	.name	= "ip_queue",
-	.outfn	= &ipq_enqueue_packet,
-};
-
-static int __init ip_queue_init(void)
-{
-	int status = -ENOMEM;
-	struct proc_dir_entry *proc __maybe_unused;
-
-	netlink_register_notifier(&ipq_nl_notifier);
-	ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
-				      ipq_rcv_skb, NULL, THIS_MODULE);
-	if (ipqnl == NULL) {
-		printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
-		goto cleanup_netlink_notifier;
-	}
-
-#ifdef CONFIG_PROC_FS
-	proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
-			   &ip_queue_proc_fops);
-	if (!proc) {
-		printk(KERN_ERR "ip_queue: failed to create proc entry\n");
-		goto cleanup_ipqnl;
-	}
-#endif
-	register_netdevice_notifier(&ipq_dev_notifier);
-#ifdef CONFIG_SYSCTL
-	ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv4", ipq_table);
-#endif
-	status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
-	if (status < 0) {
-		printk(KERN_ERR "ip_queue: failed to register queue handler\n");
-		goto cleanup_sysctl;
-	}
-	return status;
-
-cleanup_sysctl:
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(ipq_sysctl_header);
-#endif
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-cleanup_ipqnl: __maybe_unused
-	netlink_kernel_release(ipqnl);
-	mutex_lock(&ipqnl_mutex);
-	mutex_unlock(&ipqnl_mutex);
-
-cleanup_netlink_notifier:
-	netlink_unregister_notifier(&ipq_nl_notifier);
-	return status;
-}
-
-static void __exit ip_queue_fini(void)
-{
-	nf_unregister_queue_handlers(&nfqh);
-
-	ipq_flush(NULL, 0);
-
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(ipq_sysctl_header);
-#endif
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
-	netlink_kernel_release(ipqnl);
-	mutex_lock(&ipqnl_mutex);
-	mutex_unlock(&ipqnl_mutex);
-
-	netlink_unregister_notifier(&ipq_nl_notifier);
-}
-
-MODULE_DESCRIPTION("IPv4 packet queue handler");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
-
-module_init(ip_queue_init);
-module_exit(ip_queue_fini);
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,28 +25,6 @@ config NF_CONNTRACK_IPV6

 	  To compile it as a module, choose M here.  If unsure, say N.

-config IP6_NF_QUEUE
-	tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
-	depends on INET && IPV6 && NETFILTER
-	depends on NETFILTER_ADVANCED
-	---help---
-
-	  This option adds a queue handler to the kernel for IPv6
-	  packets which enables users to receive the filtered packets
-	  with QUEUE target using libipq.
-
-	  This option enables the old IPv6-only "ip6_queue" implementation
-	  which has been obsoleted by the new "nfnetlink_queue" code (see
-	  CONFIG_NETFILTER_NETLINK_QUEUE).
-
-	  (C) Fernando Anton 2001
-	  IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
-	  Universidad Carlos III de Madrid
-	  Universidad Politecnica de Alcala de Henares
-	  email: <fanton@it.uc3m.es>.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6

--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -6,7 +6,6 @@
 obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
 obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
-obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o


--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
-/*
- * This is a module which is used for queueing IPv6 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2001 Fernando Anton, this code is GPL.
- *     IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
- *     Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
- *     Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
- *     email: fanton@it.uc3m.es
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ipv6.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/netfilter/nf_queue.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip6_queue"
-#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
-
-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
-
-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_SPINLOCK(queue_lock);
-static int peer_pid __read_mostly;
-static unsigned int copy_range __read_mostly;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
-static DEFINE_MUTEX(ipqnl_mutex);
-
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
-       list_add_tail(&entry->list, &queue_list);
-       queue_total++;
-}
-
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status = 0;
-
-	switch(mode) {
-	case IPQ_COPY_NONE:
-	case IPQ_COPY_META:
-		copy_mode = mode;
-		copy_range = 0;
-		break;
-
-	case IPQ_COPY_PACKET:
-		if (range > 0xFFFF)
-			range = 0xFFFF;
-		copy_range = range;
-		copy_mode = mode;
-		break;
-
-	default:
-		status = -EINVAL;
-
-	}
-	return status;
-}
-
-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
-
-static inline void
-__ipq_reset(void)
-{
-	peer_pid = 0;
-	net_disable_timestamp();
-	__ipq_set_mode(IPQ_COPY_NONE, 0);
-	__ipq_flush(NULL, 0);
-}
-
-static struct nf_queue_entry *
-ipq_find_dequeue_entry(unsigned long id)
-{
-	struct nf_queue_entry *entry = NULL, *i;
-
-	spin_lock_bh(&queue_lock);
-
-	list_for_each_entry(i, &queue_list, list) {
-		if ((unsigned long)i == id) {
-			entry = i;
-			break;
-		}
-	}
-
-	if (entry) {
-		list_del(&entry->list);
-		queue_total--;
-	}
-
-	spin_unlock_bh(&queue_lock);
-	return entry;
-}
-
-static void
-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct nf_queue_entry *entry, *next;
-
-	list_for_each_entry_safe(entry, next, &queue_list, list) {
-		if (!cmpfn || cmpfn(entry, data)) {
-			list_del(&entry->list);
-			queue_total--;
-			nf_reinject(entry, NF_DROP);
-		}
-	}
-}
-
-static void
-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-	spin_lock_bh(&queue_lock);
-	__ipq_flush(cmpfn, data);
-	spin_unlock_bh(&queue_lock);
-}
-
-static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
-{
-	sk_buff_data_t old_tail;
-	size_t size = 0;
-	size_t data_len = 0;
-	struct sk_buff *skb;
-	struct ipq_packet_msg *pmsg;
-	struct nlmsghdr *nlh;
-	struct timeval tv;
-
-	switch (ACCESS_ONCE(copy_mode)) {
-	case IPQ_COPY_META:
-	case IPQ_COPY_NONE:
-		size = NLMSG_SPACE(sizeof(*pmsg));
-		break;
-
-	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-		    (*errp = skb_checksum_help(entry->skb)))
-			return NULL;
-
-		data_len = ACCESS_ONCE(copy_range);
-		if (data_len == 0 || data_len > entry->skb->len)
-			data_len = entry->skb->len;
-
-		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
-		break;
-
-	default:
-		*errp = -EINVAL;
-		return NULL;
-	}
-
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb)
-		goto nlmsg_failure;
-
-	old_tail = skb->tail;
-	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
-	pmsg = NLMSG_DATA(nlh);
-	memset(pmsg, 0, sizeof(*pmsg));
-
-	pmsg->packet_id       = (unsigned long )entry;
-	pmsg->data_len        = data_len;
-	tv = ktime_to_timeval(entry->skb->tstamp);
-	pmsg->timestamp_sec   = tv.tv_sec;
-	pmsg->timestamp_usec  = tv.tv_usec;
-	pmsg->mark            = entry->skb->mark;
-	pmsg->hook            = entry->hook;
-	pmsg->hw_protocol     = entry->skb->protocol;
-
-	if (entry->indev)
-		strcpy(pmsg->indev_name, entry->indev->name);
-	else
-		pmsg->indev_name[0] = '\0';
-
-	if (entry->outdev)
-		strcpy(pmsg->outdev_name, entry->outdev->name);
-	else
-		pmsg->outdev_name[0] = '\0';
-
-	if (entry->indev && entry->skb->dev &&
-	    entry->skb->mac_header != entry->skb->network_header) {
-		pmsg->hw_type = entry->skb->dev->type;
-		pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
-	}
-
-	if (data_len)
-		if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
-			BUG();
-
-	nlh->nlmsg_len = skb->tail - old_tail;
-	return skb;
-
-nlmsg_failure:
-	kfree_skb(skb);
-	*errp = -EINVAL;
-	printk(KERN_ERR "ip6_queue: error creating packet message\n");
-	return NULL;
-}
-
-static int
-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
-{
-	int status = -EINVAL;
-	struct sk_buff *nskb;
-
-	if (copy_mode == IPQ_COPY_NONE)
-		return -EAGAIN;
-
-	nskb = ipq_build_packet_message(entry, &status);
-	if (nskb == NULL)
-		return status;
-
-	spin_lock_bh(&queue_lock);
-
-	if (!peer_pid)
-		goto err_out_free_nskb;
-
-	if (queue_total >= queue_maxlen) {
-		queue_dropped++;
-		status = -ENOSPC;
-		if (net_ratelimit())
-			printk (KERN_WARNING "ip6_queue: fill at %d entries, "
-				"dropping packet(s).  Dropped: %d\n", queue_total,
-				queue_dropped);
-		goto err_out_free_nskb;
-	}
-
-	/* netlink_unicast will either free the nskb or attach it to a socket */
-	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
-	if (status < 0) {
-		queue_user_dropped++;
-		goto err_out_unlock;
-	}
-
-	__ipq_enqueue_entry(entry);
-
-	spin_unlock_bh(&queue_lock);
-	return status;
-
-err_out_free_nskb:
-	kfree_skb(nskb);
-
-err_out_unlock:
-	spin_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
-{
-	int diff;
-	struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
-	struct sk_buff *nskb;
-
-	if (v->data_len < sizeof(*user_iph))
-		return 0;
-	diff = v->data_len - e->skb->len;
-	if (diff < 0) {
-		if (pskb_trim(e->skb, v->data_len))
-			return -ENOMEM;
-	} else if (diff > 0) {
-		if (v->data_len > 0xFFFF)
-			return -EINVAL;
-		if (diff > skb_tailroom(e->skb)) {
-			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
-					       diff, GFP_ATOMIC);
-			if (!nskb) {
-				printk(KERN_WARNING "ip6_queue: OOM "
-				      "in mangle, dropping packet\n");
-				return -ENOMEM;
-			}
-			kfree_skb(e->skb);
-			e->skb = nskb;
-		}
-		skb_put(e->skb, diff);
-	}
-	if (!skb_make_writable(e->skb, v->data_len))
-		return -ENOMEM;
-	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
-	e->skb->ip_summed = CHECKSUM_NONE;
-
-	return 0;
-}
-
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
-	struct nf_queue_entry *entry;
-
-	if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
-		return -EINVAL;
-
-	entry = ipq_find_dequeue_entry(vmsg->id);
-	if (entry == NULL)
-		return -ENOENT;
-	else {
-		int verdict = vmsg->value;
-
-		if (vmsg->data_len && vmsg->data_len == len)
-			if (ipq_mangle_ipv6(vmsg, entry) < 0)
-				verdict = NF_DROP;
-
-		nf_reinject(entry, verdict);
-		return 0;
-	}
-}
-
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status;
-
-	spin_lock_bh(&queue_lock);
-	status = __ipq_set_mode(mode, range);
-	spin_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
-		 unsigned char type, unsigned int len)
-{
-	int status = 0;
-
-	if (len < sizeof(*pmsg))
-		return -EINVAL;
-
-	switch (type) {
-	case IPQM_MODE:
-		status = ipq_set_mode(pmsg->msg.mode.value,
-				      pmsg->msg.mode.range);
-		break;
-
-	case IPQM_VERDICT:
-		status = ipq_set_verdict(&pmsg->msg.verdict,
-					 len - sizeof(*pmsg));
-		break;
-	default:
-		status = -EINVAL;
-	}
-	return status;
-}
-
-static int
-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
-{
-	if (entry->indev)
-		if (entry->indev->ifindex == ifindex)
-			return 1;
-
-	if (entry->outdev)
-		if (entry->outdev->ifindex == ifindex)
-			return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (entry->skb->nf_bridge) {
-		if (entry->skb->nf_bridge->physindev &&
-		    entry->skb->nf_bridge->physindev->ifindex == ifindex)
-			return 1;
-		if (entry->skb->nf_bridge->physoutdev &&
-		    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
-			return 1;
-	}
-#endif
-	return 0;
-}
-
-static void
-ipq_dev_drop(int ifindex)
-{
-	ipq_flush(dev_cmp, ifindex);
-}
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
-static inline void
-__ipq_rcv_skb(struct sk_buff *skb)
-{
-	int status, type, pid, flags;
-	unsigned int nlmsglen, skblen;
-	struct nlmsghdr *nlh;
-	bool enable_timestamp = false;
-
-	skblen = skb->len;
-	if (skblen < sizeof(*nlh))
-		return;
-
-	nlh = nlmsg_hdr(skb);
-	nlmsglen = nlh->nlmsg_len;
-	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
-		return;
-
-	pid = nlh->nlmsg_pid;
-	flags = nlh->nlmsg_flags;
-
-	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
-		RCV_SKB_FAIL(-EINVAL);
-
-	if (flags & MSG_TRUNC)
-		RCV_SKB_FAIL(-ECOMM);
-
-	type = nlh->nlmsg_type;
-	if (type < NLMSG_NOOP || type >= IPQM_MAX)
-		RCV_SKB_FAIL(-EINVAL);
-
-	if (type <= IPQM_BASE)
-		return;
-
-	if (!capable(CAP_NET_ADMIN))
-		RCV_SKB_FAIL(-EPERM);
-
-	spin_lock_bh(&queue_lock);
-
-	if (peer_pid) {
-		if (peer_pid != pid) {
-			spin_unlock_bh(&queue_lock);
-			RCV_SKB_FAIL(-EBUSY);
-		}
-	} else {
-		enable_timestamp = true;
-		peer_pid = pid;
-	}
-
-	spin_unlock_bh(&queue_lock);
-	if (enable_timestamp)
-		net_enable_timestamp();
-
-	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
-				  nlmsglen - NLMSG_LENGTH(0));
-	if (status < 0)
-		RCV_SKB_FAIL(status);
-
-	if (flags & NLM_F_ACK)
-		netlink_ack(skb, nlh, 0);
-}
-
-static void
-ipq_rcv_skb(struct sk_buff *skb)
-{
-	mutex_lock(&ipqnl_mutex);
-	__ipq_rcv_skb(skb);
-	mutex_unlock(&ipqnl_mutex);
-}
-
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
-		  unsigned long event, void *ptr)
-{
-	struct net_device *dev = ptr;
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	/* Drop any packets associated with the downed device */
-	if (event == NETDEV_DOWN)
-		ipq_dev_drop(dev->ifindex);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_dev_notifier = {
-	.notifier_call	= ipq_rcv_dev_event,
-};
-
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
-		 unsigned long event, void *ptr)
-{
-	struct netlink_notify *n = ptr;
-
-	if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
-		spin_lock_bh(&queue_lock);
-		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
-			__ipq_reset();
-		spin_unlock_bh(&queue_lock);
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_nl_notifier = {
-	.notifier_call	= ipq_rcv_nl_event,
-};
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *ipq_sysctl_header;
-
-static ctl_table ipq_table[] = {
-	{
-		.procname	= NET_IPQ_QMAX_NAME,
-		.data		= &queue_maxlen,
-		.maxlen		= sizeof(queue_maxlen),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{ }
-};
-#endif
-
-#ifdef CONFIG_PROC_FS
-static int ip6_queue_show(struct seq_file *m, void *v)
-{
-	spin_lock_bh(&queue_lock);
-
-	seq_printf(m,
-		      "Peer PID          : %d\n"
-		      "Copy mode         : %hu\n"
-		      "Copy range        : %u\n"
-		      "Queue length      : %u\n"
-		      "Queue max. length : %u\n"
-		      "Queue dropped     : %u\n"
-		      "Netfilter dropped : %u\n",
-		      peer_pid,
-		      copy_mode,
-		      copy_range,
-		      queue_total,
-		      queue_maxlen,
-		      queue_dropped,
-		      queue_user_dropped);
-
-	spin_unlock_bh(&queue_lock);
-	return 0;
-}
-
-static int ip6_queue_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ip6_queue_show, NULL);
-}
-
-static const struct file_operations ip6_queue_proc_fops = {
-	.open		= ip6_queue_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.owner		= THIS_MODULE,
-};
-#endif
-
-static const struct nf_queue_handler nfqh = {
-	.name	= "ip6_queue",
-	.outfn	= &ipq_enqueue_packet,
-};
-
-static int __init ip6_queue_init(void)
-{
-	int status = -ENOMEM;
-	struct proc_dir_entry *proc __maybe_unused;
-
-	netlink_register_notifier(&ipq_nl_notifier);
-	ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
-			              ipq_rcv_skb, NULL, THIS_MODULE);
-	if (ipqnl == NULL) {
-		printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
-		goto cleanup_netlink_notifier;
-	}
-
-#ifdef CONFIG_PROC_FS
-	proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
-			   &ip6_queue_proc_fops);
-	if (!proc) {
-		printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
-		goto cleanup_ipqnl;
-	}
-#endif
-	register_netdevice_notifier(&ipq_dev_notifier);
-#ifdef CONFIG_SYSCTL
-	ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv6", ipq_table);
-#endif
-	status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
-	if (status < 0) {
-		printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
-		goto cleanup_sysctl;
-	}
-	return status;
-
-cleanup_sysctl:
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(ipq_sysctl_header);
-#endif
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
-cleanup_ipqnl: __maybe_unused
-	netlink_kernel_release(ipqnl);
-	mutex_lock(&ipqnl_mutex);
-	mutex_unlock(&ipqnl_mutex);
-
-cleanup_netlink_notifier:
-	netlink_unregister_notifier(&ipq_nl_notifier);
-	return status;
-}
-
-static void __exit ip6_queue_fini(void)
-{
-	nf_unregister_queue_handlers(&nfqh);
-
-	ipq_flush(NULL, 0);
-
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(ipq_sysctl_header);
-#endif
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
-	netlink_kernel_release(ipqnl);
-	mutex_lock(&ipqnl_mutex);
-	mutex_unlock(&ipqnl_mutex);
-
-	netlink_unregister_notifier(&ipq_nl_notifier);
-}
-
-MODULE_DESCRIPTION("IPv6 packet queue handler");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
-
-module_init(ip6_queue_init);
-module_exit(ip6_queue_fini);
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -548,6 +548,7 @@ static inline void
 ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 {
 	unsigned int conn_flags;
+	__u32 flags;

 	/* if dest is NULL, then return directly */
 	if (!dest)
@@ -559,17 +560,19 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 	conn_flags = atomic_read(&dest->conn_flags);
 	if (cp->protocol != IPPROTO_UDP)
 		conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
+	flags = cp->flags;
 	/* Bind with the destination and its corresponding transmitter */
-	if (cp->flags & IP_VS_CONN_F_SYNC) {
+	if (flags & IP_VS_CONN_F_SYNC) {
 		/* if the connection is not template and is created
 		 * by sync, preserve the activity flag.
 		 */
-		if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
 			conn_flags &= ~IP_VS_CONN_F_INACTIVE;
 		/* connections inherit forwarding method from dest */
-		cp->flags &= ~IP_VS_CONN_F_FWD_MASK;
+		flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT);
 	}
-	cp->flags |= conn_flags;
+	flags |= conn_flags;
+	cp->flags = flags;
 	cp->dest = dest;

 	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
@@ -584,12 +587,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		      atomic_read(&dest->refcnt));

 	/* Update the connection counters */
-	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
-		/* It is a normal connection, so increase the inactive
-		   connection counter because it is in TCP SYNRECV
-		   state (inactive) or other protocol inacive state */
-		if ((cp->flags & IP_VS_CONN_F_SYNC) &&
-		    (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+		/* It is a normal connection, so modify the counters
+		 * according to the flags, later the protocol can
+		 * update them on state change
+		 */
+		if (!(flags & IP_VS_CONN_F_INACTIVE))
 			atomic_inc(&dest->activeconns);
 		else
 			atomic_inc(&dest->inactconns);
@@ -613,14 +616,40 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 {
 	struct ip_vs_dest *dest;

-	if ((cp) && (!cp->dest)) {
 	dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
 			       cp->dport, &cp->vaddr, cp->vport,
 			       cp->protocol, cp->fwmark, cp->flags);
+	if (dest) {
+		struct ip_vs_proto_data *pd;
+
+		spin_lock(&cp->lock);
+		if (cp->dest) {
+			spin_unlock(&cp->lock);
+			return dest;
+		}
+
+		/* Applications work depending on the forwarding method
+		 * but better to reassign them always when binding dest */
+		if (cp->app)
+			ip_vs_unbind_app(cp);
+
 		ip_vs_bind_dest(cp, dest);
+		spin_unlock(&cp->lock);
+
+		/* Update its packet transmitter */
+		cp->packet_xmit = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			ip_vs_bind_xmit_v6(cp);
+		else
+#endif
+			ip_vs_bind_xmit(cp);
+
+		pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);
+		if (pd && atomic_read(&pd->appcnt))
+			ip_vs_bind_app(cp, pd->pp);
+	}
 	return dest;
-	} else
-		return NULL;
 }


@@ -743,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 static void ip_vs_conn_expire(unsigned long data)
 {
 	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
-	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+	struct net *net = ip_vs_conn_net(cp);
+	struct netns_ipvs *ipvs = net_ipvs(net);

 	cp->timeout = 60*HZ;

@@ -808,6 +838,9 @@ static void ip_vs_conn_expire(unsigned long data)
 		  atomic_read(&cp->refcnt)-1,
 		  atomic_read(&cp->n_control));

+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
+
 	ip_vs_conn_put(cp);
 }

@@ -881,6 +914,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	/* Set its state and timeout */
 	cp->state = 0;
 	cp->timeout = 3*HZ;
+	cp->sync_endtime = jiffies & ~3UL;

 	/* Bind its packet transmitter */
 #ifdef CONFIG_IP_VS_IPV6

--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	else
 		pkts = atomic_add_return(1, &cp->in_pkts);

-	if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
-	    cp->protocol == IPPROTO_SCTP) {
-		if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-			(pkts % sysctl_sync_period(ipvs)
-			 == sysctl_sync_threshold(ipvs))) ||
-				(cp->old_state != cp->state &&
-				 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
-				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
-				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
-			ip_vs_sync_conn(net, cp);
-			goto out;
-		}
-	}
-
-	/* Keep this block last: TCP and others with pp->num_states <= 1 */
-	else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
-	    (((cp->protocol != IPPROTO_TCP ||
-	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	      (pkts % sysctl_sync_period(ipvs)
-	       == sysctl_sync_threshold(ipvs))) ||
-	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
-	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
-	       (cp->state == IP_VS_TCP_S_CLOSE) ||
-	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
-	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-		ip_vs_sync_conn(net, cp);
-out:
-	cp->old_state = cp->state;
+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		ip_vs_sync_conn(net, cp, pkts);

 	ip_vs_conn_put(cp);
 	return ret;

--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net)
 }

 #ifdef CONFIG_SYSCTL
+
+static int zero;
+static int three = 3;
+
 static int
 proc_do_defense_mode(ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write,
 	memcpy(val, valp, sizeof(val));

 	rc = proc_dointvec(table, write, buffer, lenp, ppos);
-	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
+	if (write && (valp[0] < 0 || valp[1] < 0 ||
+	    (valp[0] >= valp[1] && valp[1]))) {
 		/* Restore the correct value */
 		memcpy(valp, val, sizeof(val));
 	}
@@ -1652,9 +1657,24 @@ proc_do_sync_mode(ctl_table *table, int write,
 		if ((*valp < 0) || (*valp > 1)) {
 			/* Restore the correct value */
 			*valp = val;
-		} else {
-			struct net *net = current->nsproxy->net_ns;
-			ip_vs_sync_switch_mode(net, val);
+		}
+	}
+	return rc;
+}
+
+static int
+proc_do_sync_ports(ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if (*valp < 1 || !is_power_of_2(*valp)) {
+			/* Restore the correct value */
+			*valp = val;
 		}
 	}
 	return rc;
@@ -1717,6 +1737,24 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_do_sync_mode,
 	},
+	{
+		.procname	= "sync_ports",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_sync_ports,
+	},
+	{
+		.procname	= "sync_qlen_max",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sync_sock_size",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "cache_bypass",
 		.maxlen		= sizeof(int),
@@ -1742,6 +1780,20 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_do_sync_threshold,
 	},
+	{
+		.procname	= "sync_refresh_period",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "sync_retries",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &three,
+	},
 	{
 		.procname	= "nat_icmp_send",
 		.maxlen		= sizeof(int),
@@ -3655,6 +3707,12 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
 	ipvs->sysctl_sync_ver = 1;
 	tbl[idx++].data = &ipvs->sysctl_sync_ver;
+	ipvs->sysctl_sync_ports = 1;
+	tbl[idx++].data = &ipvs->sysctl_sync_ports;
+	ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+	tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+	ipvs->sysctl_sync_sock_size = 0;
+	tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
 	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
 	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
 	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
@@ -3662,6 +3720,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
 	tbl[idx].data = &ipvs->sysctl_sync_threshold;
 	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+	ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
+	tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
+	ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
+	tbl[idx++].data = &ipvs->sysctl_sync_retries;
 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;



--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -149,7 +149,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)

 	/* allocate the DH table for this service */
 	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
-		      GFP_ATOMIC);
+		      GFP_KERNEL);
 	if (tbl == NULL)
 		return -ENOMEM;


--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -485,7 +485,7 @@ static struct pernet_operations ip_vs_ftp_ops = {
 	.exit = __ip_vs_ftp_exit,
 };

-int __init ip_vs_ftp_init(void)
+static int __init ip_vs_ftp_init(void)
 {
 	int rv;


--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Allocate the ip_vs_lblc_table for this service
 	 */
-	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
 	if (tbl == NULL)
 		return -ENOMEM;


--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Allocate the ip_vs_lblcr_table for this service
 	 */
-	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
 	if (tbl == NULL)
 		return -ENOMEM;


--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -68,7 +68,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
 	struct ip_vs_proto_data *pd =
-			kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+			kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);

 	if (!pd)
 		return -ENOMEM;
@@ -156,7 +156,7 @@ EXPORT_SYMBOL(ip_vs_proto_get);
 /*
 *	get ip_vs_protocol object data by netns and proto
 */
-struct ip_vs_proto_data *
+static struct ip_vs_proto_data *
 __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
 {
 	struct ip_vs_proto_data *pd;
@@ -199,7 +199,7 @@ void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
 int *
 ip_vs_create_timeout_table(int *table, int size)
 {
-	return kmemdup(table, size, GFP_ATOMIC);
+	return kmemdup(table, size, GFP_KERNEL);
 }



--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -162,7 +162,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)

 	/* allocate the SH table for this service */
 	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
-		      GFP_ATOMIC);
+		      GFP_KERNEL);
 	if (tbl == NULL)
 		return -ENOMEM;


--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -196,6 +196,7 @@ struct ip_vs_sync_thread_data {
 	struct net *net;
 	struct socket *sock;
 	char *buf;
+	int id;
 };

 /* Version 0 definition of packet sizes */
@@ -271,13 +272,6 @@ struct ip_vs_sync_buff {
 	unsigned char           *end;
 };

-/* multicast addr */
-static struct sockaddr_in mcast_addr = {
-	.sin_family		= AF_INET,
-	.sin_port		= cpu_to_be16(IP_VS_SYNC_PORT),
-	.sin_addr.s_addr	= cpu_to_be32(IP_VS_SYNC_GROUP),
-};
-
 /*
 * Copy of struct ip_vs_seq
 * From unaligned network order to aligned host order
@@ -300,18 +294,22 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
 	put_unaligned_be32(ho->previous_delta, &no->previous_delta);
 }

-static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
+static inline struct ip_vs_sync_buff *
+sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
 {
 	struct ip_vs_sync_buff *sb;

 	spin_lock_bh(&ipvs->sync_lock);
-	if (list_empty(&ipvs->sync_queue)) {
+	if (list_empty(&ms->sync_queue)) {
 		sb = NULL;
+		__set_current_state(TASK_INTERRUPTIBLE);
 	} else {
-		sb = list_entry(ipvs->sync_queue.next,
-				struct ip_vs_sync_buff,
+		sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,
 				list);
 		list_del(&sb->list);
+		ms->sync_queue_len--;
+		if (!ms->sync_queue_len)
+			ms->sync_queue_delay = 0;
 	}
 	spin_unlock_bh(&ipvs->sync_lock);

@@ -334,7 +332,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
 		kfree(sb);
 		return NULL;
 	}
-	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zeo now */
+	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zero now */
 	sb->mesg->version = SYNC_PROTO_VER;
 	sb->mesg->syncid = ipvs->master_syncid;
 	sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
@@ -353,14 +351,22 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
 	kfree(sb);
 }

-static inline void sb_queue_tail(struct netns_ipvs *ipvs)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs,
+				 struct ipvs_master_sync_state *ms)
 {
-	struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+	struct ip_vs_sync_buff *sb = ms->sync_buff;

 	spin_lock(&ipvs->sync_lock);
-	if (ipvs->sync_state & IP_VS_STATE_MASTER)
-		list_add_tail(&sb->list, &ipvs->sync_queue);
-	else
+	if (ipvs->sync_state & IP_VS_STATE_MASTER &&
+	    ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
+		if (!ms->sync_queue_len)
+			schedule_delayed_work(&ms->master_wakeup_work,
+					      max(IPVS_SYNC_SEND_DELAY, 1));
+		ms->sync_queue_len++;
+		list_add_tail(&sb->list, &ms->sync_queue);
+		if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
+			wake_up_process(ms->master_thread);
+	} else
 		ip_vs_sync_buff_release(sb);
 	spin_unlock(&ipvs->sync_lock);
 }
@@ -370,49 +376,26 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs)
 *	than the specified time or the specified time is zero.
 */
 static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,
+		   unsigned long time)
 {
 	struct ip_vs_sync_buff *sb;

 	spin_lock_bh(&ipvs->sync_buff_lock);
-	if (ipvs->sync_buff &&
-	    time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
-		sb = ipvs->sync_buff;
-		ipvs->sync_buff = NULL;
+	sb = ms->sync_buff;
+	if (sb && time_after_eq(jiffies - sb->firstuse, time)) {
+		ms->sync_buff = NULL;
+		__set_current_state(TASK_RUNNING);
 	} else
 		sb = NULL;
 	spin_unlock_bh(&ipvs->sync_buff_lock);
 	return sb;
 }

-/*
- * Switch mode from sending version 0 or 1
- *  - must handle sync_buf
- */
-void ip_vs_sync_switch_mode(struct net *net, int mode)
+static inline int
+select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
-	if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
-		return;
-	if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
-		return;
-
-	spin_lock_bh(&ipvs->sync_buff_lock);
-	/* Buffer empty ? then let buf_create do the job  */
-	if (ipvs->sync_buff->mesg->size <=  sizeof(struct ip_vs_sync_mesg)) {
-		kfree(ipvs->sync_buff);
-		ipvs->sync_buff = NULL;
-	} else {
-		spin_lock_bh(&ipvs->sync_lock);
-		if (ipvs->sync_state & IP_VS_STATE_MASTER)
-			list_add_tail(&ipvs->sync_buff->list,
-				      &ipvs->sync_queue);
-		else
-			ip_vs_sync_buff_release(ipvs->sync_buff);
-		spin_unlock_bh(&ipvs->sync_lock);
-	}
-	spin_unlock_bh(&ipvs->sync_buff_lock);
+	return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;
 }

 /*
@@ -442,15 +425,101 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
 	return sb;
 }

+/* Check if conn should be synced.
+ * pkts: conn packets, use sysctl_sync_threshold to avoid packet check
+ * - (1) sync_refresh_period: reduce sync rate. Additionally, retry
+ *	sync_retries times with period of sync_refresh_period/8
+ * - (2) if both sync_refresh_period and sync_period are 0 send sync only
+ *	for state changes or only once when pkts matches sync_threshold
+ * - (3) templates: rate can be reduced only with sync_refresh_period or
+ *	with (2)
+ */
+static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
+				  struct ip_vs_conn *cp, int pkts)
+{
+	unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
+	unsigned long now = jiffies;
+	unsigned long n = (now + cp->timeout) & ~3UL;
+	unsigned int sync_refresh_period;
+	int sync_period;
+	int force;
+
+	/* Check if we sync in current state */
+	if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))
+		force = 0;
+	else if (likely(cp->protocol == IPPROTO_TCP)) {
+		if (!((1 << cp->state) &
+		      ((1 << IP_VS_TCP_S_ESTABLISHED) |
+		       (1 << IP_VS_TCP_S_FIN_WAIT) |
+		       (1 << IP_VS_TCP_S_CLOSE) |
+		       (1 << IP_VS_TCP_S_CLOSE_WAIT) |
+		       (1 << IP_VS_TCP_S_TIME_WAIT))))
+			return 0;
+		force = cp->state != cp->old_state;
+		if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)
+			goto set;
+	} else if (unlikely(cp->protocol == IPPROTO_SCTP)) {
+		if (!((1 << cp->state) &
+		      ((1 << IP_VS_SCTP_S_ESTABLISHED) |
+		       (1 << IP_VS_SCTP_S_CLOSED) |
+		       (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |
+		       (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))
+			return 0;
+		force = cp->state != cp->old_state;
+		if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)
+			goto set;
+	} else {
+		/* UDP or another protocol with single state */
+		force = 0;
+	}
+
+	sync_refresh_period = sysctl_sync_refresh_period(ipvs);
+	if (sync_refresh_period > 0) {
+		long diff = n - orig;
+		long min_diff = max(cp->timeout >> 1, 10UL * HZ);
+
+		/* Avoid sync if difference is below sync_refresh_period
+		 * and below the half timeout.
+		 */
+		if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {
+			int retries = orig & 3;
+
+			if (retries >= sysctl_sync_retries(ipvs))
+				return 0;
+			if (time_before(now, orig - cp->timeout +
+					(sync_refresh_period >> 3)))
+				return 0;
+			n |= retries + 1;
+		}
+	}
+	sync_period = sysctl_sync_period(ipvs);
+	if (sync_period > 0) {
+		if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&
+		    pkts % sync_period != sysctl_sync_threshold(ipvs))
+			return 0;
+	} else if (sync_refresh_period <= 0 &&
+		   pkts != sysctl_sync_threshold(ipvs))
+		return 0;
+
+set:
+	cp->old_state = cp->state;
+	n = cmpxchg(&cp->sync_endtime, orig, n);
+	return n == orig || force;
+}
+
 /*
 *      Version 0 , could be switched in by sys_ctl.
 *      Add an ip_vs_conn information into the current sync_buff.
 */
-void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
+static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
+			       int pkts)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_sync_mesg_v0 *m;
 	struct ip_vs_sync_conn_v0 *s;
+	struct ip_vs_sync_buff *buff;
+	struct ipvs_master_sync_state *ms;
+	int id;
 	int len;

 	if (unlikely(cp->af != AF_INET))
@@ -459,21 +528,41 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
 	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
 		return;

+	if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
+		return;
+
 	spin_lock(&ipvs->sync_buff_lock);
-	if (!ipvs->sync_buff) {
-		ipvs->sync_buff =
-			ip_vs_sync_buff_create_v0(ipvs);
-		if (!ipvs->sync_buff) {
+	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+		spin_unlock(&ipvs->sync_buff_lock);
+		return;
+	}
+
+	id = select_master_thread_id(ipvs, cp);
+	ms = &ipvs->ms[id];
+	buff = ms->sync_buff;
+	if (buff) {
+		m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
+		/* Send buffer if it is for v1 */
+		if (!m->nr_conns) {
+			sb_queue_tail(ipvs, ms);
+			ms->sync_buff = NULL;
+			buff = NULL;
+		}
+	}
+	if (!buff) {
+		buff = ip_vs_sync_buff_create_v0(ipvs);
+		if (!buff) {
 			spin_unlock(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			return;
 		}
+		ms->sync_buff = buff;
 	}

 	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
 		SIMPLE_CONN_SIZE;
-	m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
-	s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
+	m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;
+	s = (struct ip_vs_sync_conn_v0 *) buff->head;

 	/* copy members */
 	s->reserved = 0;
@@ -494,18 +583,24 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)

 	m->nr_conns++;
 	m->size += len;
-	ipvs->sync_buff->head += len;
+	buff->head += len;

 	/* check if there is a space for next one */
-	if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
-		sb_queue_tail(ipvs);
-		ipvs->sync_buff = NULL;
+	if (buff->head + FULL_CONN_SIZE > buff->end) {
+		sb_queue_tail(ipvs, ms);
+		ms->sync_buff = NULL;
 	}
 	spin_unlock(&ipvs->sync_buff_lock);

 	/* synchronize its controller if it has */
-	if (cp->control)
-		ip_vs_sync_conn(net, cp->control);
+	cp = cp->control;
+	if (cp) {
+		if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+			pkts = atomic_add_return(1, &cp->in_pkts);
+		else
+			pkts = sysctl_sync_threshold(ipvs);
+		ip_vs_sync_conn(net, cp->control, pkts);
+	}
 }

 /*
@@ -513,23 +608,29 @@ void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
 *      Called by ip_vs_in.
 *      Sending Version 1 messages
 */
-void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_sync_mesg *m;
 	union ip_vs_sync_conn *s;
+	struct ip_vs_sync_buff *buff;
+	struct ipvs_master_sync_state *ms;
+	int id;
 	__u8 *p;
 	unsigned int len, pe_name_len, pad;

 	/* Handle old version of the protocol */
 	if (sysctl_sync_ver(ipvs) == 0) {
-		ip_vs_sync_conn_v0(net, cp);
+		ip_vs_sync_conn_v0(net, cp, pkts);
 		return;
 	}
 	/* Do not sync ONE PACKET */
 	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
 		goto control;
 sloop:
+	if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
+		goto control;
+
 	/* Sanity checks */
 	pe_name_len = 0;
 	if (cp->pe_data_len) {
@@ -541,6 +642,13 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
 	}

 	spin_lock(&ipvs->sync_buff_lock);
+	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+		spin_unlock(&ipvs->sync_buff_lock);
+		return;
+	}
+
+	id = select_master_thread_id(ipvs, cp);
+	ms = &ipvs->ms[id];

 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6)
@@ -559,27 +667,32 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)

 	/* check if there is a space for this one  */
 	pad = 0;
-	if (ipvs->sync_buff) {
-		pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
-		if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
-			sb_queue_tail(ipvs);
-			ipvs->sync_buff = NULL;
+	buff = ms->sync_buff;
+	if (buff) {
+		m = buff->mesg;
+		pad = (4 - (size_t) buff->head) & 3;
+		/* Send buffer if it is for v0 */
+		if (buff->head + len + pad > buff->end || m->reserved) {
+			sb_queue_tail(ipvs, ms);
+			ms->sync_buff = NULL;
+			buff = NULL;
 			pad = 0;
 		}
 	}

-	if (!ipvs->sync_buff) {
-		ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
-		if (!ipvs->sync_buff) {
+	if (!buff) {
+		buff = ip_vs_sync_buff_create(ipvs);
+		if (!buff) {
 			spin_unlock(&ipvs->sync_buff_lock);
 			pr_err("ip_vs_sync_buff_create failed.\n");
 			return;
 		}
+		ms->sync_buff = buff;
+		m = buff->mesg;
 	}

-	m = ipvs->sync_buff->mesg;
-	p = ipvs->sync_buff->head;
-	ipvs->sync_buff->head += pad + len;
+	p = buff->head;
+	buff->head += pad + len;
 	m->size += pad + len;
 	/* Add ev. padding from prev. sync_conn */
 	while (pad--)
@@ -644,16 +757,10 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
 	cp = cp->control;
 	if (!cp)
 		return;
-	/*
-	 * Reduce sync rate for templates
-	 * i.e only increment in_pkts for Templates.
-	 */
-	if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
-		int pkts = atomic_add_return(1, &cp->in_pkts);
-
-		if (pkts % sysctl_sync_period(ipvs) != 1)
-			return;
-	}
+	if (cp->flags & IP_VS_CONN_F_TEMPLATE)
+		pkts = atomic_add_return(1, &cp->in_pkts);
+	else
+		pkts = sysctl_sync_threshold(ipvs);
 	goto sloop;
 }

@@ -731,9 +838,32 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 	else
 		cp = ip_vs_ct_in_get(param);

-	if (cp && param->pe_data) 	/* Free pe_data */
+	if (cp) {
+		/* Free pe_data */
 		kfree(param->pe_data);
-	if (!cp) {
+
+		dest = cp->dest;
+		spin_lock(&cp->lock);
+		if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
+		    !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
+			if (flags & IP_VS_CONN_F_INACTIVE) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+			} else {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+			}
+		}
+		flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
+		flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
+		cp->flags = flags;
+		spin_unlock(&cp->lock);
+		if (!dest) {
+			dest = ip_vs_try_bind_dest(cp);
+			if (dest)
+				atomic_dec(&dest->refcnt);
+		}
+	} else {
 		/*
 		 * Find the appropriate destination for the connection.
 		 * If it is not found the connection will remain unbound
@@ -742,18 +872,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 		dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
 				       param->vport, protocol, fwmark, flags);

-		/*  Set the approprite ativity flag */
-		if (protocol == IPPROTO_TCP) {
-			if (state != IP_VS_TCP_S_ESTABLISHED)
-				flags |= IP_VS_CONN_F_INACTIVE;
-			else
-				flags &= ~IP_VS_CONN_F_INACTIVE;
-		} else if (protocol == IPPROTO_SCTP) {
-			if (state != IP_VS_SCTP_S_ESTABLISHED)
-				flags |= IP_VS_CONN_F_INACTIVE;
-			else
-				flags &= ~IP_VS_CONN_F_INACTIVE;
-		}
 		cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
 		if (dest)
 			atomic_dec(&dest->refcnt);
@@ -763,34 +881,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 			IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
 			return;
 		}
-	} else if (!cp->dest) {
-		dest = ip_vs_try_bind_dest(cp);
-		if (dest)
-			atomic_dec(&dest->refcnt);
-	} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
-		(cp->state != state)) {
-		/* update active/inactive flag for the connection */
-		dest = cp->dest;
-		if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-			(state != IP_VS_TCP_S_ESTABLISHED)) {
-			atomic_dec(&dest->activeconns);
-			atomic_inc(&dest->inactconns);
-			cp->flags |= IP_VS_CONN_F_INACTIVE;
-		} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-			(state == IP_VS_TCP_S_ESTABLISHED)) {
-			atomic_inc(&dest->activeconns);
-			atomic_dec(&dest->inactconns);
-			cp->flags &= ~IP_VS_CONN_F_INACTIVE;
-		}
-	} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
-		(cp->state != state)) {
-		dest = cp->dest;
-		if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-		(state != IP_VS_SCTP_S_ESTABLISHED)) {
-			atomic_dec(&dest->activeconns);
-			atomic_inc(&dest->inactconns);
-			cp->flags &= ~IP_VS_CONN_F_INACTIVE;
-		}
 	}

 	if (opt)
@@ -1148,6 +1238,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
 }


+/*
+ *      Setup sndbuf (mode=1) or rcvbuf (mode=0)
+ */
+static void set_sock_size(struct sock *sk, int mode, int val)
+{
+	/* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */
+	/* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */
+	lock_sock(sk);
+	if (mode) {
+		val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
+			      sysctl_wmem_max);
+		sk->sk_sndbuf = val * 2;
+		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+	} else {
+		val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
+			      sysctl_rmem_max);
+		sk->sk_rcvbuf = val * 2;
+		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+	}
+	release_sock(sk);
+}
+
 /*
 *      Setup loopback of outgoing multicasts on a sending socket
 */
@@ -1298,9 +1410,15 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 /*
 *      Set up sending multicast socket over UDP
 */
-static struct socket *make_send_sock(struct net *net)
+static struct socket *make_send_sock(struct net *net, int id)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	/* multicast addr */
+	struct sockaddr_in mcast_addr = {
+		.sin_family		= AF_INET,
+		.sin_port		= cpu_to_be16(IP_VS_SYNC_PORT + id),
+		.sin_addr.s_addr	= cpu_to_be32(IP_VS_SYNC_GROUP),
+	};
 	struct socket *sock;
 	int result;

@@ -1324,6 +1442,9 @@ static struct socket *make_send_sock(struct net *net)

 	set_mcast_loop(sock->sk, 0);
 	set_mcast_ttl(sock->sk, 1);
+	result = sysctl_sync_sock_size(ipvs);
+	if (result > 0)
+		set_sock_size(sock->sk, 1, result);

 	result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
 	if (result < 0) {
@@ -1349,9 +1470,15 @@ static struct socket *make_send_sock(struct net *net)
 /*
 *      Set up receiving multicast socket over UDP
 */
-static struct socket *make_receive_sock(struct net *net)
+static struct socket *make_receive_sock(struct net *net, int id)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	/* multicast addr */
+	struct sockaddr_in mcast_addr = {
+		.sin_family		= AF_INET,
+		.sin_port		= cpu_to_be16(IP_VS_SYNC_PORT + id),
+		.sin_addr.s_addr	= cpu_to_be32(IP_VS_SYNC_GROUP),
+	};
 	struct socket *sock;
 	int result;

@@ -1369,6 +1496,9 @@ static struct socket *make_receive_sock(struct net *net)
 	sk_change_net(sock->sk, net);
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = SK_CAN_REUSE;
+	result = sysctl_sync_sock_size(ipvs);
+	if (result > 0)
+		set_sock_size(sock->sk, 0, result);

 	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
 			sizeof(struct sockaddr));
@@ -1411,18 +1541,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
 	return len;
 }

-static void
+static int
 ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
 {
 	int msize;
+	int ret;

 	msize = msg->size;

 	/* Put size in network byte order */
 	msg->size = htons(msg->size);

-	if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
-		pr_err("ip_vs_send_async error\n");
+	ret = ip_vs_send_async(sock, (char *)msg, msize);
+	if (ret >= 0 || ret == -EAGAIN)
+		return ret;
+	pr_err("ip_vs_send_async error %d\n", ret);
+	return 0;
 }

 static int
@@ -1438,48 +1572,90 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
 	iov.iov_base     = buffer;
 	iov.iov_len      = (size_t)buflen;

-	len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
+	len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);

 	if (len < 0)
-		return -1;
+		return len;

 	LeaveFunction(7);
 	return len;
 }

+/* Wakeup the master thread for sending */
+static void master_wakeup_work_handler(struct work_struct *work)
+{
+	struct ipvs_master_sync_state *ms =
+		container_of(work, struct ipvs_master_sync_state,
+			     master_wakeup_work.work);
+	struct netns_ipvs *ipvs = ms->ipvs;
+
+	spin_lock_bh(&ipvs->sync_lock);
+	if (ms->sync_queue_len &&
+	    ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
+		ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
+		wake_up_process(ms->master_thread);
+	}
+	spin_unlock_bh(&ipvs->sync_lock);
+}
+
+/* Get next buffer to send */
+static inline struct ip_vs_sync_buff *
+next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
+{
+	struct ip_vs_sync_buff *sb;
+
+	sb = sb_dequeue(ipvs, ms);
+	if (sb)
+		return sb;
+	/* Do not delay entries in buffer for more than 2 seconds */
+	return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);
+}

 static int sync_thread_master(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
 	struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+	struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
+	struct sock *sk = tinfo->sock->sk;
 	struct ip_vs_sync_buff *sb;

 	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
-		"syncid = %d\n",
-		ipvs->master_mcast_ifn, ipvs->master_syncid);
+		"syncid = %d, id = %d\n",
+		ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);

-	while (!kthread_should_stop()) {
-		while ((sb = sb_dequeue(ipvs))) {
-			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
-			ip_vs_sync_buff_release(sb);
+	for (;;) {
+		sb = next_sync_buff(ipvs, ms);
+		if (unlikely(kthread_should_stop()))
+			break;
+		if (!sb) {
+			schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
+			continue;
 		}
+		while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
+			int ret = 0;

-		/* check if entries stay in ipvs->sync_buff for 2 seconds */
-		sb = get_curr_sync_buff(ipvs, 2 * HZ);
-		if (sb) {
-			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
+			__wait_event_interruptible(*sk_sleep(sk),
+						   sock_writeable(sk) ||
+						   kthread_should_stop(),
+						   ret);
+			if (unlikely(kthread_should_stop()))
+				goto done;
+		}
 		ip_vs_sync_buff_release(sb);
 	}

-		schedule_timeout_interruptible(HZ);
-	}
+done:
+	__set_current_state(TASK_RUNNING);
+	if (sb)
+		ip_vs_sync_buff_release(sb);

 	/* clean up the sync_buff queue */
-	while ((sb = sb_dequeue(ipvs)))
+	while ((sb = sb_dequeue(ipvs, ms)))
 		ip_vs_sync_buff_release(sb);
+	__set_current_state(TASK_RUNNING);

 	/* clean up the current sync_buff */
-	sb = get_curr_sync_buff(ipvs, 0);
+	sb = get_curr_sync_buff(ipvs, ms, 0);
 	if (sb)
 		ip_vs_sync_buff_release(sb);

@@ -1498,8 +1674,8 @@ static int sync_thread_backup(void *data)
 	int len;

 	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
-		"syncid = %d\n",
-		ipvs->backup_mcast_ifn, ipvs->backup_syncid);
+		"syncid = %d, id = %d\n",
+		ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);

 	while (!kthread_should_stop()) {
 		wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -1511,6 +1687,7 @@ static int sync_thread_backup(void *data)
 			len = ip_vs_receive(tinfo->sock, tinfo->buf,
 					ipvs->recv_mesg_maxlen);
 			if (len <= 0) {
+				if (len != -EAGAIN)
 					pr_err("receiving message error\n");
 				break;
 			}
@@ -1535,86 +1712,140 @@ static int sync_thread_backup(void *data)
 int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 {
 	struct ip_vs_sync_thread_data *tinfo;
-	struct task_struct **realtask, *task;
+	struct task_struct **array = NULL, *task;
 	struct socket *sock;
 	struct netns_ipvs *ipvs = net_ipvs(net);
-	char *name, *buf = NULL;
+	char *name;
 	int (*threadfn)(void *data);
+	int id, count;
 	int result = -ENOMEM;

 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
 		  sizeof(struct ip_vs_sync_conn_v0));

+	if (!ipvs->sync_state) {
+		count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
+		ipvs->threads_mask = count - 1;
+	} else
+		count = ipvs->threads_mask + 1;

 	if (state == IP_VS_STATE_MASTER) {
-		if (ipvs->master_thread)
+		if (ipvs->ms)
 			return -EEXIST;

 		strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
 			sizeof(ipvs->master_mcast_ifn));
 		ipvs->master_syncid = syncid;
-		realtask = &ipvs->master_thread;
-		name = "ipvs_master:%d";
+		name = "ipvs-m:%d:%d";
 		threadfn = sync_thread_master;
-		sock = make_send_sock(net);
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (ipvs->backup_thread)
+		if (ipvs->backup_threads)
 			return -EEXIST;

 		strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
 			sizeof(ipvs->backup_mcast_ifn));
 		ipvs->backup_syncid = syncid;
-		realtask = &ipvs->backup_thread;
-		name = "ipvs_backup:%d";
+		name = "ipvs-b:%d:%d";
 		threadfn = sync_thread_backup;
-		sock = make_receive_sock(net);
 	} else {
 		return -EINVAL;
 	}

-	if (IS_ERR(sock)) {
-		result = PTR_ERR(sock);
+	if (state == IP_VS_STATE_MASTER) {
+		struct ipvs_master_sync_state *ms;
+
+		ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);
+		if (!ipvs->ms)
 			goto out;
+		ms = ipvs->ms;
+		for (id = 0; id < count; id++, ms++) {
+			INIT_LIST_HEAD(&ms->sync_queue);
+			ms->sync_queue_len = 0;
+			ms->sync_queue_delay = 0;
+			INIT_DELAYED_WORK(&ms->master_wakeup_work,
+					  master_wakeup_work_handler);
+			ms->ipvs = ipvs;
 		}
-
-	set_sync_mesg_maxlen(net, state);
-	if (state == IP_VS_STATE_BACKUP) {
-		buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
-		if (!buf)
-			goto outsocket;
+	} else {
+		array = kzalloc(count * sizeof(struct task_struct *),
+				GFP_KERNEL);
+		if (!array)
+			goto out;
 	}
+	set_sync_mesg_maxlen(net, state);

+	tinfo = NULL;
+	for (id = 0; id < count; id++) {
+		if (state == IP_VS_STATE_MASTER)
+			sock = make_send_sock(net, id);
+		else
+			sock = make_receive_sock(net, id);
+		if (IS_ERR(sock)) {
+			result = PTR_ERR(sock);
+			goto outtinfo;
+		}
 		tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
 		if (!tinfo)
-		goto outbuf;
-
+			goto outsocket;
 		tinfo->net = net;
 		tinfo->sock = sock;
-	tinfo->buf = buf;
+		if (state == IP_VS_STATE_BACKUP) {
+			tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,
+					     GFP_KERNEL);
+			if (!tinfo->buf)
+				goto outtinfo;
+		}
+		tinfo->id = id;

-	task = kthread_run(threadfn, tinfo, name, ipvs->gen);
+		task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
 		if (IS_ERR(task)) {
 			result = PTR_ERR(task);
 			goto outtinfo;
 		}
+		tinfo = NULL;
+		if (state == IP_VS_STATE_MASTER)
+			ipvs->ms[id].master_thread = task;
+		else
+			array[id] = task;
+	}

 	/* mark as active */
-	*realtask = task;
+
+	if (state == IP_VS_STATE_BACKUP)
+		ipvs->backup_threads = array;
+	spin_lock_bh(&ipvs->sync_buff_lock);
 	ipvs->sync_state |= state;
+	spin_unlock_bh(&ipvs->sync_buff_lock);

 	/* increase the module use count */
 	ip_vs_use_count_inc();

 	return 0;

-outtinfo:
-	kfree(tinfo);
-outbuf:
-	kfree(buf);
 outsocket:
 	sk_release_kernel(sock->sk);
+
+outtinfo:
+	if (tinfo) {
+		sk_release_kernel(tinfo->sock->sk);
+		kfree(tinfo->buf);
+		kfree(tinfo);
+	}
+	count = id;
+	while (count-- > 0) {
+		if (state == IP_VS_STATE_MASTER)
+			kthread_stop(ipvs->ms[count].master_thread);
+		else
+			kthread_stop(array[count]);
+	}
+	kfree(array);
+
 out:
+	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
+		kfree(ipvs->ms);
+		ipvs->ms = NULL;
+	}
 	return result;
 }

@@ -1622,38 +1853,60 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 int stop_sync_thread(struct net *net, int state)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct task_struct **array;
+	int id;
 	int retc = -EINVAL;

 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));

 	if (state == IP_VS_STATE_MASTER) {
-		if (!ipvs->master_thread)
+		if (!ipvs->ms)
 			return -ESRCH;

-		pr_info("stopping master sync thread %d ...\n",
-			task_pid_nr(ipvs->master_thread));
-
 		/*
 		 * The lock synchronizes with sb_queue_tail(), so that we don't
 		 * add sync buffers to the queue, when we are already in
 		 * progress of stopping the master sync daemon.
 		 */

-		spin_lock_bh(&ipvs->sync_lock);
+		spin_lock_bh(&ipvs->sync_buff_lock);
+		spin_lock(&ipvs->sync_lock);
 		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
-		spin_unlock_bh(&ipvs->sync_lock);
-		retc = kthread_stop(ipvs->master_thread);
-		ipvs->master_thread = NULL;
+		spin_unlock(&ipvs->sync_lock);
+		spin_unlock_bh(&ipvs->sync_buff_lock);
+
+		retc = 0;
+		for (id = ipvs->threads_mask; id >= 0; id--) {
+			struct ipvs_master_sync_state *ms = &ipvs->ms[id];
+			int ret;
+
+			pr_info("stopping master sync thread %d ...\n",
+				task_pid_nr(ms->master_thread));
+			cancel_delayed_work_sync(&ms->master_wakeup_work);
+			ret = kthread_stop(ms->master_thread);
+			if (retc >= 0)
+				retc = ret;
+		}
+		kfree(ipvs->ms);
+		ipvs->ms = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
-		if (!ipvs->backup_thread)
+		if (!ipvs->backup_threads)
 			return -ESRCH;

-		pr_info("stopping backup sync thread %d ...\n",
-			task_pid_nr(ipvs->backup_thread));
-
 		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
-		retc = kthread_stop(ipvs->backup_thread);
-		ipvs->backup_thread = NULL;
+		array = ipvs->backup_threads;
+		retc = 0;
+		for (id = ipvs->threads_mask; id >= 0; id--) {
+			int ret;
+
+			pr_info("stopping backup sync thread %d ...\n",
+				task_pid_nr(array[id]));
+			ret = kthread_stop(array[id]);
+			if (retc >= 0)
+				retc = ret;
+		}
+		kfree(array);
+		ipvs->backup_threads = NULL;
 	}

 	/* decrease the module use count */
@@ -1670,13 +1923,8 @@ int __net_init ip_vs_sync_net_init(struct net *net)
 	struct netns_ipvs *ipvs = net_ipvs(net);

 	__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
-	INIT_LIST_HEAD(&ipvs->sync_queue);
 	spin_lock_init(&ipvs->sync_lock);
 	spin_lock_init(&ipvs->sync_buff_lock);
-
-	ipvs->sync_mcast_addr.sin_family = AF_INET;
-	ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
-	ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
 	return 0;
 }


--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -84,7 +84,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Allocate the mark variable for WRR scheduling
 	 */
-	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
+	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL);
 	if (mark == NULL)
 		return -ENOMEM;


--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1336,7 +1336,6 @@ static void nf_conntrack_cleanup_init_net(void)
 	while (untrack_refs() > 0)
 		schedule();

-	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
@@ -1354,6 +1353,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}

 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
+	nf_conntrack_helper_fini(net);
 	nf_conntrack_timeout_fini(net);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_tstamp_fini(net);
@@ -1504,10 +1504,6 @@ static int nf_conntrack_init_init_net(void)
 	if (ret < 0)
 		goto err_proto;

-	ret = nf_conntrack_helper_init();
-	if (ret < 0)
-		goto err_helper;
-
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
@@ -1525,10 +1521,8 @@ static int nf_conntrack_init_init_net(void)

 #ifdef CONFIG_NF_CONNTRACK_ZONES
 err_extend:
-	nf_conntrack_helper_fini();
-#endif
-err_helper:
 	nf_conntrack_proto_fini();
+#endif
 err_proto:
 	return ret;
 }
@@ -1589,9 +1583,14 @@ static int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_timeout_init(net);
 	if (ret < 0)
 		goto err_timeout;
+	ret = nf_conntrack_helper_init(net);
+	if (ret < 0)
+		goto err_helper;

 	return 0;

+err_helper:
+	nf_conntrack_timeout_fini(net);
 err_timeout:
 	nf_conntrack_ecache_fini(net);
 err_ecache:

--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 int nf_conntrack_register_notifier(struct net *net,
 				   struct nf_ct_event_notifier *new)
 {
-	int ret = 0;
+	int ret;
 	struct nf_ct_event_notifier *notify;

 	mutex_lock(&nf_ct_ecache_mutex);
@@ -95,8 +95,7 @@ int nf_conntrack_register_notifier(struct net *net,
 		goto out_unlock;
 	}
 	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
-	mutex_unlock(&nf_ct_ecache_mutex);
-	return ret;
+	ret = 0;

 out_unlock:
 	mutex_unlock(&nf_ct_ecache_mutex);
@@ -121,7 +120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 int nf_ct_expect_register_notifier(struct net *net,
 				   struct nf_exp_event_notifier *new)
 {
-	int ret = 0;
+	int ret;
 	struct nf_exp_event_notifier *notify;

 	mutex_lock(&nf_ct_ecache_mutex);
@@ -132,8 +131,7 @@ int nf_ct_expect_register_notifier(struct net *net,
 		goto out_unlock;
 	}
 	rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
-	mutex_unlock(&nf_ct_ecache_mutex);
-	return ret;
+	ret = 0;

 out_unlock:
 	mutex_unlock(&nf_ct_ecache_mutex);

--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -34,6 +34,67 @@ static struct hlist_head *nf_ct_helper_hash __read_mostly;
 static unsigned int nf_ct_helper_hsize __read_mostly;
 static unsigned int nf_ct_helper_count __read_mostly;

+static bool nf_ct_auto_assign_helper __read_mostly = true;
+module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
+MODULE_PARM_DESC(nf_conntrack_helper,
+		 "Enable automatic conntrack helper assignment (default 1)");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table helper_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_helper",
+		.data		= &init_net.ct.sysctl_auto_assign_helper,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+
+static int nf_conntrack_helper_init_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out;
+
+	table[0].data = &net->ct.sysctl_auto_assign_helper;
+
+	net->ct.helper_sysctl_header =
+		register_net_sysctl(net, "net/netfilter", table);
+
+	if (!net->ct.helper_sysctl_header) {
+		pr_err("nf_conntrack_helper: can't register to sysctl.\n");
+		goto out_register;
+	}
+	return 0;
+
+out_register:
+	kfree(table);
+out:
+	return -ENOMEM;
+}
+
+static void nf_conntrack_helper_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ct.helper_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.helper_sysctl_header);
+	kfree(table);
+}
+#else
+static int nf_conntrack_helper_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_helper_fini_sysctl(struct net *net)
+{
+}
+#endif /* CONFIG_SYSCTL */

 /* Stupid hash, but collision free for the default registrations of the
 * helpers currently in the kernel. */
@@ -118,17 +179,38 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 {
 	struct nf_conntrack_helper *helper = NULL;
 	struct nf_conn_help *help;
+	struct net *net = nf_ct_net(ct);
 	int ret = 0;

+	/* We already got a helper explicitly attached. The function
+	 * nf_conntrack_alter_reply - in case NAT is in use - asks for looking
+	 * the helper up again. Since now the user is in full control of
+	 * making consistent helper configurations, skip this automatic
+	 * re-lookup, otherwise we'll lose the helper.
+	 */
+	if (test_bit(IPS_HELPER_BIT, &ct->status))
+		return 0;
+
 	if (tmpl != NULL) {
 		help = nfct_help(tmpl);
-		if (help != NULL)
+		if (help != NULL) {
 			helper = help->helper;
+			set_bit(IPS_HELPER_BIT, &ct->status);
+		}
 	}

 	help = nfct_help(ct);
-	if (helper == NULL)
+	if (net->ct.sysctl_auto_assign_helper && helper == NULL) {
 		helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+		if (unlikely(!net->ct.auto_assign_helper_warned && helper)) {
+			pr_info("nf_conntrack: automatic helper "
+				"assignment is deprecated and it will "
+				"be removed soon. Use the iptables CT target "
+				"to attach helpers instead.\n");
+			net->ct.auto_assign_helper_warned = true;
+		}
+	}
+
 	if (helper == NULL) {
 		if (help)
 			RCU_INIT_POINTER(help->helper, NULL);
@@ -315,28 +397,44 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
 	.id	= NF_CT_EXT_HELPER,
 };

-int nf_conntrack_helper_init(void)
+int nf_conntrack_helper_init(struct net *net)
 {
 	int err;

+	net->ct.auto_assign_helper_warned = false;
+	net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
+
+	if (net_eq(net, &init_net)) {
 		nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-	nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
+		nf_ct_helper_hash =
+			nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
 		if (!nf_ct_helper_hash)
 			return -ENOMEM;

 		err = nf_ct_extend_register(&helper_extend);
 		if (err < 0)
 			goto err1;
+	}
+
+	err = nf_conntrack_helper_init_sysctl(net);
+	if (err < 0)
+		goto out_sysctl;

 	return 0;

+out_sysctl:
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&helper_extend);
 err1:
 	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 	return err;
 }

-void nf_conntrack_helper_fini(void)
+void nf_conntrack_helper_fini(struct net *net)
 {
+	nf_conntrack_helper_fini_sysctl(net);
+	if (net_eq(net, &init_net)) {
 		nf_ct_extend_unregister(&helper_extend);
 		nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
+	}
 }
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2080,7 +2080,15 @@ static int
 ctnetlink_change_expect(struct nf_conntrack_expect *x,
 			const struct nlattr * const cda[])
 {
-	return -EOPNOTSUPP;
+	if (cda[CTA_EXPECT_TIMEOUT]) {
+		if (!del_timer(&x->timeout))
+			return -ETIME;
+
+		x->timeout.expires = jiffies +
+			ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
+		add_timer(&x->timeout);
+	}
+	return 0;
 }

 static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = {

--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -14,7 +14,6 @@
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/if.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
 #include <linux/inet_diag.h>
 #include <linux/xfrm.h>
 #include <linux/audit.h>
@@ -70,12 +69,6 @@ static struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_SETDCB,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 };

-static struct nlmsg_perm nlmsg_firewall_perms[] =
-{
-	{ IPQM_MODE,		NETLINK_FIREWALL_SOCKET__NLMSG_WRITE },
-	{ IPQM_VERDICT,		NETLINK_FIREWALL_SOCKET__NLMSG_WRITE },
-};
-
 static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
 {
 	{ TCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
@@ -145,12 +138,6 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 				 sizeof(nlmsg_route_perms));
 		break;

-	case SECCLASS_NETLINK_FIREWALL_SOCKET:
-	case SECCLASS_NETLINK_IP6FW_SOCKET:
-		err = nlmsg_perm(nlmsg_type, perm, nlmsg_firewall_perms,
-				 sizeof(nlmsg_firewall_perms));
-		break;
-
 	case SECCLASS_NETLINK_TCPDIAG_SOCKET:
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms,
 				 sizeof(nlmsg_tcpdiag_perms));