Commit f169af2c authored by David S. Miller's avatar David S. Miller

Merge branch 'netlink-mmap-remove'

Florian Westphal says:

====================
netlink: remove mmapped netlink support

As discussed during netconf 2016 in Seville, this series removes
CONFIG_NETLINK_MMAP.

Close to three years after it was merged it has retained several problems
that do not appear to be fixable.

No official netfilter libmnl release contains support for mmap backed netlink
sockets. No openvswitch release makes use of it either.

To use the mmap interface, userspace not only has to probe for mmap netlink
support, it also has to implement a recv/socket receive path in order to
handle messages that exceed the size of an rx ring element (NL_MMAP_STATUS_COPY).

So if there are odd programs out there that attempt to use MMAP netlink
they should continue to work as they already need a socket based code path
to work properly.

The actual revert (first patch) has a list of problems.
The followup patches remove a couple of helpers that are no longer needed
after the revert.

I did a few tests with mmap vs. socket based interface on a 4.4 based
kernel on an i7-4790 box and there are no performance advantages:

loopback, single nfqueue, queueing in -t filter INPUT:
traffic generated by 8 * ping -q -f localhost:
socket backend:
real    0m27.325s
user    0m3.993s
sys     0m23.292s

with mmap ring backend:
real    0m29.054s
user    0m4.924s
sys     0m24.127s

with single tcp stream, unidirectional, loopback mtu set at 1500
(nc localhost discard < /dev/zero > /dev/null):

socket interface:
time nfqdump -b $((8 * 1024 * 1024 * 1024)) -w /dev/null
real    0m15.960s
user    0m1.756s
sys     0m11.143s

mmap ring:
real    0m16.441s
user    0m3.040s
sys     0m13.687s

socket interface nfqdump[1] with --gso option (i.e. MTU is exceeded,
no kernel-side segmentation and checksum fixups) completes in about 5s.

I also tested dumping a conntrack table with 1m entries.
On my box this takes about 2.4 seconds for both mmap and socket backend:

time LD_PRELOAD=../../src/.libs/libmnl.so ./nfct-dump-sk > /dev/null
mnl_cb_run: Success
messages: 1000000
real    0m2.485s
user    0m1.085s
sys     0m1.400s

time LD_PRELOAD=../../src/.libs/libmnl.so ./nfct-dump-mmap > /dev/null
messages: 1000000
real    0m2.451s
user    0m1.124s
sys     0m1.328s

[1] https://git.breakpoint.cc/cgit/fw/nfqdump.git/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7e6e18fb c5b0db32
This diff is collapsed.
......@@ -34,8 +34,6 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
int nfnetlink_has_listeners(struct net *net, unsigned int group);
struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
u32 dst_portid, gfp_t gfp_mask);
int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags);
int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
......
......@@ -69,16 +69,6 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group)
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
unsigned int ldiff, u32 dst_portid,
gfp_t gfp_mask);
static inline struct sk_buff *
netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid,
gfp_t gfp_mask)
{
return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask);
}
extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
__u32 group, gfp_t allocation);
......
......@@ -83,7 +83,6 @@ struct genl_family {
* @attrs: netlink attributes
* @_net: network namespace
* @user_ptr: user pointers
* @dst_sk: destination socket
*/
struct genl_info {
u32 snd_seq;
......@@ -94,7 +93,6 @@ struct genl_info {
struct nlattr ** attrs;
possible_net_t _net;
void * user_ptr[2];
struct sock * dst_sk;
};
static inline struct net *genl_info_net(struct genl_info *info)
......@@ -188,8 +186,6 @@ int genl_unregister_family(struct genl_family *family);
void genl_notify(struct genl_family *family, struct sk_buff *skb,
struct genl_info *info, u32 group, gfp_t flags);
struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info,
gfp_t flags);
void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
struct genl_family *family, int flags, u8 cmd);
......
......@@ -107,8 +107,10 @@ struct nlmsgerr {
#define NETLINK_PKTINFO 3
#define NETLINK_BROADCAST_ERROR 4
#define NETLINK_NO_ENOBUFS 5
#ifndef __KERNEL__
#define NETLINK_RX_RING 6
#define NETLINK_TX_RING 7
#endif
#define NETLINK_LISTEN_ALL_NSID 8
#define NETLINK_LIST_MEMBERSHIPS 9
#define NETLINK_CAP_ACK 10
......@@ -134,6 +136,7 @@ struct nl_mmap_hdr {
__u32 nm_gid;
};
#ifndef __KERNEL__
enum nl_mmap_status {
NL_MMAP_STATUS_UNUSED,
NL_MMAP_STATUS_RESERVED,
......@@ -145,6 +148,7 @@ enum nl_mmap_status {
#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
#endif
#define NET_MAJOR 36 /* Major 36 is reserved for networking */
......
......@@ -48,6 +48,8 @@ enum {
#define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */
#define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */
#ifndef __KERNEL__
#define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */
#endif
#endif
......@@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
u32 dst_portid, gfp_t gfp_mask)
{
return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
}
EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags)
{
......
......@@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC);
skb = alloc_skb(n, GFP_ATOMIC);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
* packet */
skb = nfnetlink_alloc_skb(net, pkt_size,
peer_portid, GFP_ATOMIC);
skb = alloc_skb(pkt_size, GFP_ATOMIC);
}
}
......
......@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
__be32 **packet_id_ptr)
{
size_t size;
size_t data_len = 0, cap_len = 0, rem_len = 0;
size_t data_len = 0, cap_len = 0;
unsigned int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
......@@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
hlen = min_t(unsigned int, hlen, data_len);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
rem_len = data_len - hlen;
break;
}
......@@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
size += nla_total_size(seclen);
}
skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
GFP_ATOMIC);
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
return NULL;
......
......@@ -2,15 +2,6 @@
# Netlink Sockets
#
config NETLINK_MMAP
bool "NETLINK: mmaped IO"
---help---
This option enables support for memory mapped netlink IO. This
reduces overhead by avoiding copying data between kernel- and
userspace.
If unsure, say N.
config NETLINK_DIAG
tristate "NETLINK: socket monitoring interface"
default n
......
This diff is collapsed.
......@@ -44,12 +44,6 @@ struct netlink_sock {
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
struct module *module;
#ifdef CONFIG_NETLINK_MMAP
struct mutex pg_vec_lock;
struct netlink_ring rx_ring;
struct netlink_ring tx_ring;
atomic_t mapped;
#endif /* CONFIG_NETLINK_MMAP */
struct rhash_head node;
struct rcu_head rcu;
......@@ -60,15 +54,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
return container_of(sk, struct netlink_sock, sk);
}
static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
{
#ifdef CONFIG_NETLINK_MMAP
return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
#else
return false;
#endif /* CONFIG_NETLINK_MMAP */
}
struct netlink_table {
struct rhashtable hash;
struct hlist_head mc_list;
......
......@@ -8,41 +8,6 @@
#include "af_netlink.h"
#ifdef CONFIG_NETLINK_MMAP
static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
struct sk_buff *nlskb)
{
struct netlink_diag_ring ndr;
ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
ndr.ndr_block_nr = ring->pg_vec_len;
ndr.ndr_frame_size = ring->frame_size;
ndr.ndr_frame_nr = ring->frame_max + 1;
return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
}
static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
{
struct netlink_sock *nlk = nlk_sk(sk);
int ret;
mutex_lock(&nlk->pg_vec_lock);
ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
if (!ret)
ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
nlskb);
mutex_unlock(&nlk->pg_vec_lock);
return ret;
}
#else
static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
{
return 0;
}
#endif
static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
{
struct netlink_sock *nlk = nlk_sk(sk);
......@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
goto out_nlmsg_trim;
if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
sk_diag_put_rings_cfg(sk, skb))
goto out_nlmsg_trim;
nlmsg_end(skb, nlh);
return 0;
......
......@@ -462,26 +462,6 @@ int genl_unregister_family(struct genl_family *family)
}
EXPORT_SYMBOL(genl_unregister_family);
/**
* genlmsg_new_unicast - Allocate generic netlink message for unicast
* @payload: size of the message payload
* @info: information on destination
* @flags: the type of memory to allocate
*
* Allocates a new sk_buff large enough to cover the specified payload
* plus required Netlink headers. Will check receiving socket for
* memory mapped i/o capability and use it if enabled. Will fall back
* to non-mapped skb if message size exceeds the frame size of the ring.
*/
struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info,
gfp_t flags)
{
size_t len = nlmsg_total_size(genlmsg_total_size(payload));
return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags);
}
EXPORT_SYMBOL_GPL(genlmsg_new_unicast);
/**
* genlmsg_put - Add generic netlink header to netlink message
* @skb: socket buffer holding the message
......@@ -642,7 +622,6 @@ static int genl_family_rcv_msg(struct genl_family *family,
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
info.dst_sk = skb->sk;
genl_info_net_set(&info, net);
memset(&info.user_ptr, 0, sizeof(info.user_ptr));
......
......@@ -422,10 +422,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
struct sk_buff *nskb = NULL;
struct sk_buff *user_skb = NULL; /* to be queued to userspace */
struct nlattr *nla;
struct genl_info info = {
.dst_sk = ovs_dp_get_net(dp)->genl_sock,
.snd_portid = upcall_info->portid,
};
size_t len;
unsigned int hlen;
int err, dp_ifindex;
......@@ -466,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
hlen = skb->len;
len = upcall_msg_size(upcall_info, hlen);
user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
user_skb = genlmsg_new(len, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
goto out;
......@@ -876,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
return NULL;
len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
skb = genlmsg_new(len, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
......@@ -1481,9 +1477,9 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
return -EMSGSIZE;
}
static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
static struct sk_buff *ovs_dp_cmd_alloc_info(void)
{
return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
}
/* Called with rcu_read_lock or ovs_mutex. */
......@@ -1536,7 +1532,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
goto err;
reply = ovs_dp_cmd_alloc_info(info);
reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
......@@ -1657,7 +1653,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
reply = ovs_dp_cmd_alloc_info(info);
reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
......@@ -1690,7 +1686,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
reply = ovs_dp_cmd_alloc_info(info);
reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
......@@ -1723,7 +1719,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
reply = ovs_dp_cmd_alloc_info(info);
reply = ovs_dp_cmd_alloc_info();
if (!reply)
return -ENOMEM;
......
......@@ -1104,7 +1104,6 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
req_nlh = (struct nlmsghdr *)skb->data;
msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
msg.cmd = req_userhdr->cmd;
msg.dst_sk = info->dst_sk;
msg.net = genl_info_net(info);
if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment