Commit 15eac2a7 authored by Pravin B Shelar's avatar Pravin B Shelar Committed by Jesse Gross

openvswitch: Increase maximum number of datapath ports.

Use hash table to store ports of datapath. Allow 64K ports per switch.
Signed-off-by: default avatarPravin B Shelar <pshelar@nicira.com>
Signed-off-by: default avatarJesse Gross <jesse@nicira.com>
parent 46df7b81
...@@ -266,7 +266,7 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) ...@@ -266,7 +266,7 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
if (unlikely(!skb)) if (unlikely(!skb))
return -ENOMEM; return -ENOMEM;
vport = rcu_dereference(dp->ports[out_port]); vport = ovs_vport_rcu(dp, out_port);
if (unlikely(!vport)) { if (unlikely(!vport)) {
kfree_skb(skb); kfree_skb(skb);
return -ENODEV; return -ENODEV;
......
...@@ -116,7 +116,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex) ...@@ -116,7 +116,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
/* Must be called with rcu_read_lock or RTNL lock. */ /* Must be called with rcu_read_lock or RTNL lock. */
const char *ovs_dp_name(const struct datapath *dp) const char *ovs_dp_name(const struct datapath *dp)
{ {
struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]); struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
return vport->ops->get_name(vport); return vport->ops->get_name(vport);
} }
...@@ -127,7 +127,7 @@ static int get_dpifindex(struct datapath *dp) ...@@ -127,7 +127,7 @@ static int get_dpifindex(struct datapath *dp)
rcu_read_lock(); rcu_read_lock();
local = rcu_dereference(dp->ports[OVSP_LOCAL]); local = ovs_vport_rcu(dp, OVSP_LOCAL);
if (local) if (local)
ifindex = local->ops->get_ifindex(local); ifindex = local->ops->get_ifindex(local);
else else
...@@ -145,9 +145,30 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ...@@ -145,9 +145,30 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
free_percpu(dp->stats_percpu); free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp)); release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
kfree(dp); kfree(dp);
} }
static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
u16 port_no)
{
return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
}
struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
{
struct vport *vport;
struct hlist_node *n;
struct hlist_head *head;
head = vport_hash_bucket(dp, port_no);
hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) {
if (vport->port_no == port_no)
return vport;
}
return NULL;
}
/* Called with RTNL lock and genl_lock. */ /* Called with RTNL lock and genl_lock. */
static struct vport *new_vport(const struct vport_parms *parms) static struct vport *new_vport(const struct vport_parms *parms)
{ {
...@@ -156,9 +177,9 @@ static struct vport *new_vport(const struct vport_parms *parms) ...@@ -156,9 +177,9 @@ static struct vport *new_vport(const struct vport_parms *parms)
vport = ovs_vport_add(parms); vport = ovs_vport_add(parms);
if (!IS_ERR(vport)) { if (!IS_ERR(vport)) {
struct datapath *dp = parms->dp; struct datapath *dp = parms->dp;
struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
rcu_assign_pointer(dp->ports[parms->port_no], vport); hlist_add_head_rcu(&vport->dp_hash_node, head);
list_add(&vport->node, &dp->port_list);
} }
return vport; return vport;
...@@ -170,8 +191,7 @@ void ovs_dp_detach_port(struct vport *p) ...@@ -170,8 +191,7 @@ void ovs_dp_detach_port(struct vport *p)
ASSERT_RTNL(); ASSERT_RTNL();
/* First drop references to device. */ /* First drop references to device. */
list_del(&p->node); hlist_del_rcu(&p->dp_hash_node);
rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
/* Then destroy it. */ /* Then destroy it. */
ovs_vport_del(p); ovs_vport_del(p);
...@@ -1248,7 +1268,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1248,7 +1268,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp; struct datapath *dp;
struct vport *vport; struct vport *vport;
struct ovs_net *ovs_net; struct ovs_net *ovs_net;
int err; int err, i;
err = -EINVAL; err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
...@@ -1261,7 +1281,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1261,7 +1281,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (dp == NULL) if (dp == NULL)
goto err_unlock_rtnl; goto err_unlock_rtnl;
INIT_LIST_HEAD(&dp->port_list);
ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
/* Allocate table. */ /* Allocate table. */
...@@ -1276,6 +1295,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1276,6 +1295,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_table; goto err_destroy_table;
} }
dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
GFP_KERNEL);
if (!dp->ports) {
err = -ENOMEM;
goto err_destroy_percpu;
}
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
/* Set up our datapath device. */ /* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL; parms.type = OVS_VPORT_TYPE_INTERNAL;
...@@ -1290,7 +1319,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1290,7 +1319,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (err == -EBUSY) if (err == -EBUSY)
err = -EEXIST; err = -EEXIST;
goto err_destroy_percpu; goto err_destroy_ports_array;
} }
reply = ovs_dp_cmd_build_info(dp, info->snd_pid, reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
...@@ -1309,7 +1338,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1309,7 +1338,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
return 0; return 0;
err_destroy_local_port: err_destroy_local_port:
ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
err_destroy_ports_array:
kfree(dp->ports);
err_destroy_percpu: err_destroy_percpu:
free_percpu(dp->stats_percpu); free_percpu(dp->stats_percpu);
err_destroy_table: err_destroy_table:
...@@ -1326,15 +1357,21 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1326,15 +1357,21 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
/* Called with genl_mutex. */ /* Called with genl_mutex. */
static void __dp_destroy(struct datapath *dp) static void __dp_destroy(struct datapath *dp)
{ {
struct vport *vport, *next_vport; int i;
rtnl_lock(); rtnl_lock();
list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *node, *n;
hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node)
if (vport->port_no != OVSP_LOCAL) if (vport->port_no != OVSP_LOCAL)
ovs_dp_detach_port(vport); ovs_dp_detach_port(vport);
}
list_del(&dp->list_node); list_del(&dp->list_node);
ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
/* rtnl_unlock() will wait until all the references to devices that /* rtnl_unlock() will wait until all the references to devices that
* are pending unregistration have been dropped. We do it here to * are pending unregistration have been dropped. We do it here to
...@@ -1566,7 +1603,7 @@ static struct vport *lookup_vport(struct net *net, ...@@ -1566,7 +1603,7 @@ static struct vport *lookup_vport(struct net *net,
if (!dp) if (!dp)
return ERR_PTR(-ENODEV); return ERR_PTR(-ENODEV);
vport = rcu_dereference_rtnl(dp->ports[port_no]); vport = ovs_vport_rtnl_rcu(dp, port_no);
if (!vport) if (!vport)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
return vport; return vport;
...@@ -1603,7 +1640,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1603,7 +1640,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (port_no >= DP_MAX_PORTS) if (port_no >= DP_MAX_PORTS)
goto exit_unlock; goto exit_unlock;
vport = rtnl_dereference(dp->ports[port_no]); vport = ovs_vport_rtnl_rcu(dp, port_no);
err = -EBUSY; err = -EBUSY;
if (vport) if (vport)
goto exit_unlock; goto exit_unlock;
...@@ -1613,7 +1650,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -1613,7 +1650,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -EFBIG; err = -EFBIG;
goto exit_unlock; goto exit_unlock;
} }
vport = rtnl_dereference(dp->ports[port_no]); vport = ovs_vport_rtnl(dp, port_no);
if (!vport) if (!vport)
break; break;
} }
...@@ -1755,32 +1792,39 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1755,32 +1792,39 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
struct datapath *dp; struct datapath *dp;
u32 port_no; int bucket = cb->args[0], skip = cb->args[1];
int retval; int i, j = 0;
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) if (!dp)
return -ENODEV; return -ENODEV;
rcu_read_lock(); rcu_read_lock();
for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) { for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport; struct vport *vport;
struct hlist_node *n;
vport = rcu_dereference(dp->ports[port_no]); j = 0;
if (!vport) hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) {
continue; if (j >= skip &&
ovs_vport_cmd_fill_info(vport, skb,
if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh->nlmsg_seq,
NLM_F_MULTI,
OVS_VPORT_CMD_NEW) < 0) OVS_VPORT_CMD_NEW) < 0)
break; goto out;
j++;
} }
skip = 0;
}
out:
rcu_read_unlock(); rcu_read_unlock();
cb->args[0] = port_no; cb->args[0] = i;
retval = skb->len; cb->args[1] = j;
return retval; return skb->len;
} }
static struct genl_ops dp_vport_genl_ops[] = { static struct genl_ops dp_vport_genl_ops[] = {
......
...@@ -29,7 +29,9 @@ ...@@ -29,7 +29,9 @@
#include "flow.h" #include "flow.h"
#include "vport.h" #include "vport.h"
#define DP_MAX_PORTS 1024 #define DP_MAX_PORTS USHRT_MAX
#define DP_VPORT_HASH_BUCKETS 1024
#define SAMPLE_ACTION_DEPTH 3 #define SAMPLE_ACTION_DEPTH 3
/** /**
...@@ -57,10 +59,8 @@ struct dp_stats_percpu { ...@@ -57,10 +59,8 @@ struct dp_stats_percpu {
* @list_node: Element in global 'dps' list. * @list_node: Element in global 'dps' list.
* @n_flows: Number of flows currently in flow table. * @n_flows: Number of flows currently in flow table.
* @table: Current flow table. Protected by genl_lock and RCU. * @table: Current flow table. Protected by genl_lock and RCU.
* @ports: Map from port number to &struct vport. %OVSP_LOCAL port * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
* always exists, other ports may be %NULL. Protected by RTNL and RCU. * RTNL and RCU.
* @port_list: List of all ports in @ports in arbitrary order. RTNL required
* to iterate or modify.
* @stats_percpu: Per-CPU datapath statistics. * @stats_percpu: Per-CPU datapath statistics.
* @net: Reference to net namespace. * @net: Reference to net namespace.
* *
...@@ -75,8 +75,7 @@ struct datapath { ...@@ -75,8 +75,7 @@ struct datapath {
struct flow_table __rcu *table; struct flow_table __rcu *table;
/* Switch ports. */ /* Switch ports. */
struct vport __rcu *ports[DP_MAX_PORTS]; struct hlist_head *ports;
struct list_head port_list;
/* Stats. */ /* Stats. */
struct dp_stats_percpu __percpu *stats_percpu; struct dp_stats_percpu __percpu *stats_percpu;
...@@ -87,6 +86,26 @@ struct datapath { ...@@ -87,6 +86,26 @@ struct datapath {
#endif #endif
}; };
struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return ovs_lookup_vport(dp, port_no);
}
static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
{
WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
return ovs_lookup_vport(dp, port_no);
}
static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
{
ASSERT_RTNL();
return ovs_lookup_vport(dp, port_no);
}
/** /**
* struct ovs_skb_cb - OVS data in skb CB * struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow. * @flow: The flow associated with this packet. May be %NULL if no flow.
......
...@@ -203,10 +203,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) ...@@ -203,10 +203,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
int actions_len = nla_len(actions); int actions_len = nla_len(actions);
struct sw_flow_actions *sfa; struct sw_flow_actions *sfa;
/* At least DP_MAX_PORTS actions are required to be able to flood a if (actions_len > MAX_ACTIONS_BUFSIZE)
* packet to every port. Factor of 2 allows for setting VLAN tags,
* etc. */
if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL);
...@@ -1000,7 +997,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, ...@@ -1000,7 +997,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
swkey->phy.in_port = in_port; swkey->phy.in_port = in_port;
attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
} else { } else {
swkey->phy.in_port = USHRT_MAX; swkey->phy.in_port = DP_MAX_PORTS;
} }
/* Data attributes. */ /* Data attributes. */
...@@ -1143,7 +1140,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, ...@@ -1143,7 +1140,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
const struct nlattr *nla; const struct nlattr *nla;
int rem; int rem;
*in_port = USHRT_MAX; *in_port = DP_MAX_PORTS;
*priority = 0; *priority = 0;
nla_for_each_nested(nla, attr, rem) { nla_for_each_nested(nla, attr, rem) {
...@@ -1180,7 +1177,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) ...@@ -1180,7 +1177,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
goto nla_put_failure; goto nla_put_failure;
if (swkey->phy.in_port != USHRT_MAX && if (swkey->phy.in_port != DP_MAX_PORTS &&
nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
goto nla_put_failure; goto nla_put_failure;
......
...@@ -43,7 +43,7 @@ struct sw_flow_actions { ...@@ -43,7 +43,7 @@ struct sw_flow_actions {
struct sw_flow_key { struct sw_flow_key {
struct { struct {
u32 priority; /* Packet QoS priority. */ u32 priority; /* Packet QoS priority. */
u16 in_port; /* Input switch port (or USHRT_MAX). */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} phy; } phy;
struct { struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 src[ETH_ALEN]; /* Ethernet source address. */
...@@ -161,6 +161,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, ...@@ -161,6 +161,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
const struct nlattr *); const struct nlattr *);
#define MAX_ACTIONS_BUFSIZE (16 * 1024)
#define TBL_MIN_BUCKETS 1024 #define TBL_MIN_BUCKETS 1024
struct flow_table { struct flow_table {
......
...@@ -127,6 +127,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, ...@@ -127,6 +127,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->port_no = parms->port_no; vport->port_no = parms->port_no;
vport->upcall_pid = parms->upcall_pid; vport->upcall_pid = parms->upcall_pid;
vport->ops = ops; vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
if (!vport->percpu_stats) { if (!vport->percpu_stats) {
......
...@@ -70,10 +70,10 @@ struct vport_err_stats { ...@@ -70,10 +70,10 @@ struct vport_err_stats {
* @rcu: RCU callback head for deferred destruction. * @rcu: RCU callback head for deferred destruction.
* @port_no: Index into @dp's @ports array. * @port_no: Index into @dp's @ports array.
* @dp: Datapath to which this port belongs. * @dp: Datapath to which this port belongs.
* @node: Element in @dp's @port_list.
* @upcall_pid: The Netlink port to use for packets received on this port that * @upcall_pid: The Netlink port to use for packets received on this port that
* miss the flow table. * miss the flow table.
* @hash_node: Element in @dev_table hash table in vport.c. * @hash_node: Element in @dev_table hash table in vport.c.
* @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
* @ops: Class structure. * @ops: Class structure.
* @percpu_stats: Points to per-CPU statistics used and maintained by vport * @percpu_stats: Points to per-CPU statistics used and maintained by vport
* @stats_lock: Protects @err_stats; * @stats_lock: Protects @err_stats;
...@@ -83,10 +83,10 @@ struct vport { ...@@ -83,10 +83,10 @@ struct vport {
struct rcu_head rcu; struct rcu_head rcu;
u16 port_no; u16 port_no;
struct datapath *dp; struct datapath *dp;
struct list_head node;
u32 upcall_pid; u32 upcall_pid;
struct hlist_node hash_node; struct hlist_node hash_node;
struct hlist_node dp_hash_node;
const struct vport_ops *ops; const struct vport_ops *ops;
struct vport_percpu_stats __percpu *percpu_stats; struct vport_percpu_stats __percpu *percpu_stats;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment