Commit 19fd08b8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "Doug and I are at a conference next week so if another PR is sent I
  expect it to only be bug fixes. Parav noted yesterday that there are
  some fringe case behavior changes in his work that he would like to
  fix, and I see that Intel has a number of rc looking patches for HFI1
  they posted yesterday.

  Parav is again the biggest contributor by patch count with his ongoing
  work to enable container support in the RDMA stack, followed by Leon
  doing syzkaller inspired cleanups, though most of the actual fixing
  went to RC.

  There is one uncomfortable series here fixing the user ABI to actually
  work as intended in 32 bit mode. There are lots of notes in the commit
  messages, but the basic summary is we don't think there is an actual
  32 bit kernel user of drivers/infiniband for several good reasons.

  However we are seeing people want to use a 32 bit user space wi...
parents 28da7be5 efc365e7
......@@ -102,6 +102,8 @@ Koushik <raghavendra.koushik@neterion.com>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
Leonid I Ananiev <leonid.i.ananiev@intel.com>
Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
......
......@@ -7214,6 +7214,7 @@ M: Shiraz Saleem <shiraz.saleem@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/i40iw/
F: include/uapi/rdma/i40iw-abi.h
INTEL SHA MULTIBUFFER DRIVER
M: Megha Dey <megha.dey@linux.intel.com>
......
......@@ -35,14 +35,13 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
rdma-core <https://github.com/linux-rdma/rdma-core>.
config INFINIBAND_EXP_USER_ACCESS
bool "Enable the full uverbs ioctl interface (EXPERIMENTAL)"
config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
depends on INFINIBAND_USER_ACCESS
---help---
IOCTL based ABI support for Infiniband. This allows userspace
to invoke the experimental IOCTL based ABI.
These commands are parsed via per-device parsing tree and
enables per-device features.
IOCTL based uAPI support for Infiniband is enabled by default for
new verbs only. This allows userspace to invoke the IOCTL based uAPI
for current legacy verbs too.
config INFINIBAND_USER_MEM
bool
......
......@@ -34,4 +34,6 @@ ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_ioctl_merge.o
uverbs_ioctl_merge.o uverbs_std_types_cq.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o
......@@ -329,7 +329,8 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
static int ib_nl_fetch_ha(const struct dst_entry *dst,
struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
......@@ -340,7 +341,8 @@ static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
static int dst_fetch_ha(const struct dst_entry *dst,
struct rdma_dev_addr *dev_addr,
const void *daddr)
{
struct neighbour *n;
......@@ -364,7 +366,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ret;
}
static bool has_gateway(struct dst_entry *dst, sa_family_t family)
static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
{
struct rtable *rt;
struct rt6_info *rt6;
......@@ -378,7 +380,7 @@ static bool has_gateway(struct dst_entry *dst, sa_family_t family)
return rt6->rt6i_flags & RTF_GATEWAY;
}
static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const struct sockaddr *dst_in, u32 seq)
{
const struct sockaddr_in *dst_in4 =
......@@ -482,7 +484,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
}
#endif
static int addr_resolve_neigh(struct dst_entry *dst,
static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
u32 seq)
......@@ -736,7 +738,6 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
return addr_resolve(src_in, dst_addr, addr, false, 0);
}
EXPORT_SYMBOL(rdma_resolve_ip_route);
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
......
This diff is collapsed.
......@@ -462,13 +462,31 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
struct cm_av *av,
struct cm_port *port)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&cm.lock, flags);
if (&cm_id_priv->av == av)
list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
else if (&cm_id_priv->alt_av == av)
list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
else
ret = -EINVAL;
spin_unlock_irqrestore(&cm.lock, flags);
return ret;
}
static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
int ret;
u8 p;
struct net_device *ndev = ib_get_ndev_from_path(path);
......@@ -477,7 +495,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
sa_conv_pathrec_to_gid_type(path),
ndev, &p, NULL)) {
port = cm_dev->port[p-1];
port = cm_dev->port[p - 1];
break;
}
}
......@@ -485,9 +503,20 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
if (ndev)
dev_put(ndev);
return port;
}
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct cm_device *cm_dev;
struct cm_port *port;
int ret;
port = get_cm_port_from_path(path);
if (!port)
return -EINVAL;
cm_dev = port->cm_dev;
ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
be16_to_cpu(path->pkey), &av->pkey_index);
......@@ -502,16 +531,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
av->timeout = path->packet_life_time + 1;
spin_lock_irqsave(&cm.lock, flags);
if (&cm_id_priv->av == av)
list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
else if (&cm_id_priv->alt_av == av)
list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
else
ret = -EINVAL;
spin_unlock_irqrestore(&cm.lock, flags);
ret = add_cm_id_to_port_list(cm_id_priv, av, port);
return ret;
}
......@@ -1523,6 +1543,8 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
cm_req_get_primary_local_ack_timeout(req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
primary_path->service_id = req_msg->service_id;
if (sa_path_is_roce(primary_path))
primary_path->roce.route_resolved = false;
if (cm_req_has_alt_path(req_msg)) {
alt_path->dgid = req_msg->alt_local_gid;
......@@ -1542,6 +1564,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
cm_req_get_alt_local_ack_timeout(req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
alt_path->service_id = req_msg->service_id;
if (sa_path_is_roce(alt_path))
alt_path->roce.route_resolved = false;
}
cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
}
......@@ -3150,6 +3175,13 @@ static int cm_lap_handler(struct cm_work *work)
struct ib_mad_send_buf *msg = NULL;
int ret;
/* Currently Alternate path messages are not supported for
* RoCE link layer.
*/
if (rdma_protocol_roce(work->port->cm_dev->ib_device,
work->port->port_num))
return -EINVAL;
/* todo: verify LAP request and send reject APR if invalid. */
lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
......@@ -3299,6 +3331,13 @@ static int cm_apr_handler(struct cm_work *work)
struct cm_apr_msg *apr_msg;
int ret;
/* Currently Alternate path messages are not supported for
* RoCE link layer.
*/
if (rdma_protocol_roce(work->port->cm_dev->ib_device,
work->port->port_num))
return -EINVAL;
apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
apr_msg->local_comm_id);
......
......@@ -62,6 +62,7 @@
#include <rdma/iw_cm.h>
#include "core_priv.h"
#include "cma_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
......@@ -174,7 +175,7 @@ static struct cma_pernet *cma_pernet(struct net *net)
return net_generic(net, cma_pernet_id);
}
static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps)
{
struct cma_pernet *pernet = cma_pernet(net);
......@@ -203,7 +204,7 @@ struct cma_device {
};
struct rdma_bind_list {
enum rdma_port_space ps;
enum rdma_ucm_port_space ps;
struct hlist_head owners;
unsigned short port;
};
......@@ -216,7 +217,7 @@ struct class_port_info_context {
u8 port_num;
};
static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
......@@ -225,14 +226,15 @@ static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
}
static struct rdma_bind_list *cma_ps_find(struct net *net,
enum rdma_port_space ps, int snum)
enum rdma_ucm_port_space ps, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
return idr_find(idr, snum);
}
static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
......@@ -327,46 +329,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
* We do this by disabling removal notification while a callback is in process,
* and reporting it after the callback completes.
*/
struct rdma_id_private {
struct rdma_cm_id id;
struct rdma_bind_list *bind_list;
struct hlist_node node;
struct list_head list; /* listen_any_list or cma_device.list */
struct list_head listen_list; /* per device listens */
struct cma_device *cma_dev;
struct list_head mc_list;
int internal_id;
enum rdma_cm_state state;
spinlock_t lock;
struct mutex qp_mutex;
struct completion comp;
atomic_t refcount;
struct mutex handler_mutex;
int backlog;
int timeout_ms;
struct ib_sa_query *query;
int query_id;
union {
struct ib_cm_id *ib;
struct iw_cm_id *iw;
} cm_id;
u32 seq_num;
u32 qkey;
u32 qp_num;
pid_t owner;
u32 options;
u8 srq;
u8 tos;
bool tos_set;
u8 reuseaddr;
u8 afonly;
enum ib_gid_type gid_type;
};
struct cma_multicast {
struct rdma_id_private *id_priv;
......@@ -505,6 +467,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
id_priv->res.type = RDMA_RESTRACK_CM_ID;
rdma_restrack_add(&id_priv->res);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
......@@ -777,10 +741,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
complete(&id_priv->comp);
}
struct rdma_cm_id *rdma_create_id(struct net *net,
rdma_cm_event_handler event_handler,
void *context, enum rdma_port_space ps,
enum ib_qp_type qp_type)
struct rdma_cm_id *__rdma_create_id(struct net *net,
rdma_cm_event_handler event_handler,
void *context, enum rdma_ucm_port_space ps,
enum ib_qp_type qp_type, const char *caller)
{
struct rdma_id_private *id_priv;
......@@ -788,7 +752,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
id_priv->owner = task_pid_nr(current);
if (caller)
id_priv->res.kern_name = caller;
else
rdma_restrack_set_task(&id_priv->res, current);
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
......@@ -808,7 +775,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
return &id_priv->id;
}
EXPORT_SYMBOL(rdma_create_id);
EXPORT_SYMBOL(__rdma_create_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
......@@ -1400,7 +1367,7 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
return net_dev;
}
static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id)
{
return (be64_to_cpu(service_id) >> 16) & 0xffff;
}
......@@ -1441,21 +1408,12 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
return true;
}
static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
{
enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
enum rdma_transport_type transport =
rdma_node_get_transport(device->node_type);
return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
}
static bool cma_protocol_roce(const struct rdma_cm_id *id)
{
struct ib_device *device = id->device;
const int port_num = id->port_num ?: rdma_start_port(device);
return cma_protocol_roce_dev_port(device, port_num);
return rdma_protocol_roce(device, port_num);
}
static bool cma_match_net_dev(const struct rdma_cm_id *id,
......@@ -1468,7 +1426,7 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
/* This request is an AF_IB request or a RoCE request */
return (!id->port_num || id->port_num == port_num) &&
(addr->src_addr.ss_family == AF_IB ||
cma_protocol_roce_dev_port(id->device, port_num));
rdma_protocol_roce(id->device, port_num));
return !addr->dev_addr.bound_dev_if ||
(net_eq(dev_net(net_dev), addr->dev_addr.net) &&
......@@ -1523,7 +1481,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
*net_dev = NULL;
} else if (cma_protocol_roce_dev_port(req.device, req.port)) {
} else if (rdma_protocol_roce(req.device, req.port)) {
/* TODO find the net dev matching the request parameters
* through the RoCE GID table */
*net_dev = NULL;
......@@ -1668,6 +1626,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_unlock(&id_priv->handler_mutex);
if (id_priv->cma_dev) {
rdma_restrack_del(&id_priv->res);
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib);
......@@ -1817,6 +1776,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
struct rdma_route *rt;
......@@ -1826,9 +1786,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
ib_event->param.req_rcvd.primary_path->service_id;
int ret;
id = rdma_create_id(listen_id->route.addr.dev_addr.net,
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
listen_id->event_handler, listen_id->context,
listen_id->ps, ib_event->param.req_rcvd.qp_type);
listen_id->ps, ib_event->param.req_rcvd.qp_type,
listen_id_priv->res.kern_name);
if (IS_ERR(id))
return NULL;
......@@ -1877,14 +1839,17 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
struct net *net = listen_id->route.addr.dev_addr.net;
int ret;
id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
listen_id->ps, IB_QPT_UD);
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
listen_id->ps, IB_QPT_UD,
listen_id_priv->res.kern_name);
if (IS_ERR(id))
return NULL;
......@@ -2150,10 +2115,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
/* Create a new RDMA id for the new IW CM ID */
new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
listen_id->id.event_handler,
listen_id->id.context,
RDMA_PS_TCP, IB_QPT_RC);
new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
listen_id->id.event_handler,
listen_id->id.context,
RDMA_PS_TCP, IB_QPT_RC,
listen_id->res.kern_name);
if (IS_ERR(new_cm_id)) {
ret = -ENOMEM;
goto out;
......@@ -2278,8 +2244,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
id_priv->id.qp_type);
id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
id_priv->id.qp_type, id_priv->res.kern_name);
if (IS_ERR(id))
return;
......@@ -2541,6 +2507,7 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
gid_type = ib_network_to_gid_type(addr->dev_addr.network);
route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
route->path_rec->roce.route_resolved = true;
sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
sa_path_set_ifindex(route->path_rec, ndev->ifindex);
sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
......@@ -3028,7 +2995,7 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
hlist_add_head(&id_priv->node, &bind_list->owners);
}
static int cma_alloc_port(enum rdma_port_space ps,
static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv, unsigned short snum)
{
struct rdma_bind_list *bind_list;
......@@ -3091,7 +3058,7 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
return 0;
}
static int cma_alloc_any_port(enum rdma_port_space ps,
static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
static unsigned int last_used_port;
......@@ -3169,7 +3136,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
return 0;
}
static int cma_use_port(enum rdma_port_space ps,
static int cma_use_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list;
......@@ -3203,8 +3170,8 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
return ret;
}
static enum rdma_port_space cma_select_inet_ps(
struct rdma_id_private *id_priv)
static enum rdma_ucm_port_space
cma_select_inet_ps(struct rdma_id_private *id_priv)
{
switch (id_priv->id.ps) {
case RDMA_PS_TCP:
......@@ -3218,9 +3185,10 @@ static enum rdma_port_space cma_select_inet_ps(
}
}
static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
static enum rdma_ucm_port_space
cma_select_ib_ps(struct rdma_id_private *id_priv)
{
enum rdma_port_space ps = 0;
enum rdma_ucm_port_space ps = 0;
struct sockaddr_ib *sib;
u64 sid_ps, mask, sid;
......@@ -3251,7 +3219,7 @@ static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
static int cma_get_port(struct rdma_id_private *id_priv)
{
enum rdma_port_space ps;
enum rdma_ucm_port_space ps;
int ret;
if (cma_family(id_priv) != AF_IB)
......@@ -3389,8 +3357,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
if (id_priv->cma_dev)
if (id_priv->cma_dev) {
rdma_restrack_del(&id_priv->res);
cma_release_dev(id_priv);
}
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
......@@ -3773,14 +3743,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
const char *caller)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->owner = task_pid_nr(current);
if (caller)
id_priv->res.kern_name = caller;
else
rdma_restrack_set_task(&id_priv->res, current);
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
......@@ -3820,7 +3794,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
rdma_reject(id, NULL, 0);
return ret;
}
EXPORT_SYMBOL(rdma_accept);
EXPORT_SYMBOL(__rdma_accept);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
......@@ -3938,10 +3912,14 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
rdma_start_port(id_priv->cma_dev->device)];
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec,
ndev, gid_type,
&event.param.ud.ah_attr);
ret = ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num,
&multicast->rec,
ndev, gid_type,
&event.param.ud.ah_attr);
if (ret)
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
if (ndev)
......@@ -4501,7 +4479,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
goto out;
id_stats->pid = id_priv->owner;
id_stats->pid = task_pid_vnr(id_priv->res.task);
id_stats->port_space = id->ps;
id_stats->cm_state = id_priv->state;
id_stats->qp_num = id_priv->qp_num;
......
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _CMA_PRIV_H
#define _CMA_PRIV_H
enum rdma_cm_state {
RDMA_CM_IDLE,
RDMA_CM_ADDR_QUERY,
RDMA_CM_ADDR_RESOLVED,
RDMA_CM_ROUTE_QUERY,
RDMA_CM_ROUTE_RESOLVED,
RDMA_CM_CONNECT,
RDMA_CM_DISCONNECT,
RDMA_CM_ADDR_BOUND,
RDMA_CM_LISTEN,
RDMA_CM_DEVICE_REMOVAL,
RDMA_CM_DESTROYING
};
struct rdma_id_private {
struct rdma_cm_id id;
struct rdma_bind_list *bind_list;
struct hlist_node node;
struct list_head list; /* listen_any_list or cma_device.list */
struct list_head listen_list; /* per device listens */
struct cma_device *cma_dev;
struct list_head mc_list;
int internal_id;
enum rdma_cm_state state;
spinlock_t lock;
struct mutex qp_mutex;
struct completion comp;
atomic_t refcount;
struct mutex handler_mutex;
int backlog;
int timeout_ms;
struct ib_sa_query *query;
int query_id;
union {
struct ib_cm_id *ib;
struct iw_cm_id *iw;
} cm_id;
u32 seq_num;
u32 qkey;
u32 qp_num;
u32 options;
u8 srq;
u8 tos;
bool tos_set;
u8 reuseaddr;
u8 afonly;
enum ib_gid_type gid_type;
/*
* Internal to RDMA/core, don't use in the drivers
*/
struct rdma_restrack_entry res;
};
#endif /* _CMA_PRIV_H */
......@@ -333,4 +333,15 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
return qp;
}
struct rdma_dev_addr;
int rdma_resolve_ip_route(struct sockaddr *src_addr,
const struct sockaddr *dst_addr,
struct rdma_dev_addr *addr);
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
u8 *dmac, const struct net_device *ndev,
int *hoplimit);
#endif /* _CORE_PRIV_H */
......@@ -103,7 +103,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(query_gid),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_ah),
......@@ -853,7 +852,7 @@ int ib_query_port(struct ib_device *device,
if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
return 0;
err = ib_query_gid(device, port_num, 0, &gid, NULL);
err = device->query_gid(device, port_num, 0, &gid);
if (err)
return err;
......@@ -871,19 +870,13 @@ EXPORT_SYMBOL(ib_query_port);
* @attr: Returned GID attributes related to this GID index (only in RoCE).
* NULL means ignore.
*
* ib_query_gid() fetches the specified GID table entry.
* ib_query_gid() fetches the specified GID table entry from the cache.
*/
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid,
struct ib_gid_attr *attr)
{
if (rdma_cap_roce_gid_table(device, port_num))
return ib_get_cached_gid(device, port_num, index, gid, attr);
if (attr)
return -EINVAL;
return device->query_gid(device, port_num, index, gid);
return ib_get_cached_gid(device, port_num, index, gid, attr);
}
EXPORT_SYMBOL(ib_query_gid);
......@@ -1049,19 +1042,18 @@ EXPORT_SYMBOL(ib_modify_port);
* a specified GID value occurs. Its searches only for IB link layer.
* @device: The device to query.
* @gid: The GID value to search for.
* @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
struct net_device *ndev, u8 *port_num, u16 *index)
u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
if (rdma_cap_roce_gid_table(device, port))
if (!rdma_protocol_ib(device, port))
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
......
......@@ -439,10 +439,9 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
struct sk_buff *skb = NULL;
skb = dev_alloc_skb(IWPM_MSG_SIZE);
if (!skb) {
pr_err("%s Unable to allocate skb\n", __func__);
if (!skb)
goto create_nlmsg_exit;
}
if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
NLM_F_REQUEST))) {
pr_warn("%s: Unable to put the nlmsg header\n", __func__);
......
......@@ -724,21 +724,19 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
{
int ret;
u16 gid_index;
u8 p;
if (rdma_protocol_roce(device, port_num)) {
ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
gid_type, port_num,
ndev,
&gid_index);
} else if (rdma_protocol_ib(device, port_num)) {
ret = ib_find_cached_gid(device, &rec->port_gid,
IB_GID_TYPE_IB, NULL, &p,
&gid_index);
} else {
ret = -EINVAL;
}
/* GID table is not based on the netdevice for IB link layer,
* so ignore ndev during search.
*/
if (rdma_protocol_ib(device, port_num))
ndev = NULL;
else if (!rdma_protocol_roce(device, port_num))
return -EINVAL;
ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
gid_type, port_num,
ndev,
&gid_index);
if (ret)
return ret;
......
......@@ -34,9 +34,11 @@
#include <linux/pid.h>
#include <linux/pid_namespace.h>
#include <net/netlink.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
#include "cma_priv.h"
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
......@@ -71,6 +73,31 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
.len = TASK_COMM_LEN },
[RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
.len = sizeof(struct __kernel_sockaddr_storage) },
[RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
.len = sizeof(struct __kernel_sockaddr_storage) },
[RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
.len = IFNAMSIZ },
};
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
......@@ -99,7 +126,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
ib_get_device_fw_str(device, fw);
/* Device without FW has strlen(fw) */
/* Device without FW has strlen(fw) = 0 */
if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
return -EMSGSIZE;
......@@ -115,8 +142,10 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
}
static int fill_port_info(struct sk_buff *msg,
struct ib_device *device, u32 port)
struct ib_device *device, u32 port,
const struct net *net)
{
struct net_device *netdev = NULL;
struct ib_port_attr attr;
int ret;
......@@ -150,7 +179,23 @@ static int fill_port_info(struct sk_buff *msg,
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
return -EMSGSIZE;
return 0;
if (device->get_netdev)
netdev = device->get_netdev(device, port);
if (netdev && net_eq(dev_net(netdev), net)) {
ret = nla_put_u32(msg,
RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
if (ret)
goto out;
ret = nla_put_string(msg,
RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
}
out:
if (netdev)
dev_put(netdev);
return ret;
}
static int fill_res_info_entry(struct sk_buff *msg,
......@@ -182,6 +227,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_PD] = "pd",
[RDMA_RESTRACK_CQ] = "cq",
[RDMA_RESTRACK_QP] = "qp",
[RDMA_RESTRACK_CM_ID] = "cm_id",
[RDMA_RESTRACK_MR] = "mr",
};
struct rdma_restrack_root *res = &device->res;
......@@ -212,10 +259,29 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
return ret;
}
static int fill_res_qp_entry(struct sk_buff *msg,
struct ib_qp *qp, uint32_t port)
static int fill_res_name_pid(struct sk_buff *msg,
struct rdma_restrack_entry *res)
{
/*
* For user resources, user is should read /proc/PID/comm to get the
* name of the task file.
*/
if (rdma_is_kernel_res(res)) {
if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
res->kern_name))
return -EMSGSIZE;
} else {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
task_pid_vnr(res->task)))
return -EMSGSIZE;
}
return 0;
}
static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct rdma_restrack_entry *res = &qp->res;
struct ib_qp *qp = container_of(res, struct ib_qp, res);
struct ib_qp_init_attr qp_init_attr;
struct nlattr *entry_attr;
struct ib_qp_attr qp_attr;
......@@ -262,19 +328,172 @@ static int fill_res_qp_entry(struct sk_buff *msg,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;
/*
* Existence of task means that it is user QP and netlink
* user is invited to go and read /proc/PID/comm to get name
* of the task file and res->task_com should be NULL.
*/
if (rdma_is_kernel_res(res)) {
if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
if (fill_res_name_pid(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
}
static int fill_res_cm_id_entry(struct sk_buff *msg,
struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct rdma_id_private *id_priv =
container_of(res, struct rdma_id_private, res);
struct rdma_cm_id *cm_id = &id_priv->id;
struct nlattr *entry_attr;
if (port && port != cm_id->port_num)
return 0;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
if (!entry_attr)
goto out;
if (cm_id->port_num &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
goto err;
if (id_priv->qp_num) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
goto err;
} else {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
goto err;
}
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
goto err;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
goto err;
if (cm_id->route.addr.src_addr.ss_family &&
nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
sizeof(cm_id->route.addr.src_addr),
&cm_id->route.addr.src_addr))
goto err;
if (cm_id->route.addr.dst_addr.ss_family &&
nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
sizeof(cm_id->route.addr.dst_addr),
&cm_id->route.addr.dst_addr))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
}
static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_cq *cq = container_of(res, struct ib_cq, res);
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
if (!entry_attr)
goto out;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
goto err;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&cq->usecnt), 0))
goto err;
/* Poll context is only valid for kernel CQs */
if (rdma_is_kernel_res(res) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
}
static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_mr *mr = container_of(res, struct ib_mr, res);
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
if (!entry_attr)
goto out;
if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
goto err;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
goto err;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
mr->iova, 0))
goto err;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
err:
nla_nest_cancel(msg, entry_attr);
out:
return -EMSGSIZE;
}
static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_pd *pd = container_of(res, struct ib_pd, res);
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
if (!entry_attr)
goto out;
if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
pd->local_dma_lkey))
goto err;
if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
pd->unsafe_global_rkey))
goto err;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&pd->usecnt), 0))
goto err;
if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
pd->unsafe_global_rkey))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
......@@ -405,7 +624,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
err = fill_port_info(msg, device, port);
err = fill_port_info(msg, device, port, sock_net(skb->sk));
if (err)
goto err_free;
......@@ -465,7 +684,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
RDMA_NLDEV_CMD_PORT_GET),
0, NLM_F_MULTI);
if (fill_port_info(skb, device, p)) {
if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
nlmsg_cancel(skb, nlh);
goto out;
}
......@@ -558,23 +777,60 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
}
static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
struct nldev_fill_res_entry {
int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, u32 port);
enum rdma_nldev_attr nldev_attr;
enum rdma_nldev_command nldev_cmd;
};
static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_QP] = {
.fill_res_func = fill_res_qp_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
},
[RDMA_RESTRACK_CM_ID] = {
.fill_res_func = fill_res_cm_id_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
},
[RDMA_RESTRACK_CQ] = {
.fill_res_func = fill_res_cq_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
},
[RDMA_RESTRACK_MR] = {
.fill_res_func = fill_res_mr_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
},
[RDMA_RESTRACK_PD] = {
.fill_res_func = fill_res_pd_entry,
.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
},
};
static int res_get_common_dumpit(struct sk_buff *skb,
struct netlink_callback *cb,
enum rdma_restrack_type res_type)
{
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct rdma_restrack_entry *res;
int err, ret = 0, idx = 0;
struct nlattr *table_attr;
struct ib_device *device;
int start = cb->args[0];
struct ib_qp *qp = NULL;
struct nlmsghdr *nlh;
u32 index, port = 0;
bool filled = false;
err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, NULL);
/*
* Right now, we are expecting the device index to get QP information,
* Right now, we are expecting the device index to get res information,
* but it is possible to extend this code to return all devices in
* one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
* if it doesn't exist, we will iterate over all devices.
......@@ -601,7 +857,7 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
0, NLM_F_MULTI);
if (fill_nldev_handle(skb, device)) {
......@@ -609,24 +865,26 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
goto err;
}
table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
table_attr = nla_nest_start(skb, fe->nldev_attr);
if (!table_attr) {
ret = -EMSGSIZE;
goto err;
}
down_read(&device->res.rwsem);
hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
hash_for_each_possible(device->res.hash, res, node, res_type) {
if (idx < start)
goto next;
if ((rdma_is_kernel_res(res) &&
task_active_pid_ns(current) != &init_pid_ns) ||
(!rdma_is_kernel_res(res) &&
task_active_pid_ns(current) != task_active_pid_ns(res->task)))
(!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
task_active_pid_ns(res->task)))
/*
* 1. Kernel QPs should be visible in init namspace only
* 2. Present only QPs visible in the current namespace
* 1. Kern resources should be visible in init
* namspace only
* 2. Present only resources visible in the current
* namespace
*/
goto next;
......@@ -638,10 +896,10 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
*/
goto next;
qp = container_of(res, struct ib_qp, res);
filled = true;
up_read(&device->res.rwsem);
ret = fill_res_qp_entry(skb, qp, port);
ret = fe->fill_res_func(skb, cb, res, port);
down_read(&device->res.rwsem);
/*
* Return resource back, but it won't be released till
......@@ -667,10 +925,10 @@ next: idx++;
cb->args[0] = idx;
/*
* No more QPs to fill, cancel the message and
* No more entries to fill, cancel the message and
* return 0 to mark end of dumpit.
*/
if (!qp)
if (!filled)
goto err;
put_device(&device->dev);
......@@ -688,6 +946,36 @@ next: idx++;
return ret;
}
static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
}
static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
}
static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
}
static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
}
static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
}
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
......@@ -714,6 +1002,18 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
* too.
*/
},
[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
.dump = nldev_res_get_cm_id_dumpit,
},
[RDMA_NLDEV_CMD_RES_CQ_GET] = {
.dump = nldev_res_get_cq_dumpit,
},
[RDMA_NLDEV_CMD_RES_MR_GET] = {
.dump = nldev_res_get_mr_dumpit,
},
[RDMA_NLDEV_CMD_RES_PD_GET] = {
.dump = nldev_res_get_pd_dumpit,
},
};
void __init nldev_init(void)
......
......@@ -350,13 +350,6 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
return type->type_class->alloc_begin(type, ucontext);
}
static void uverbs_uobject_add(struct ib_uobject *uobject)
{
mutex_lock(&uobject->context->uobjects_lock);
list_add(&uobject->list, &uobject->context->uobjects);
mutex_unlock(&uobject->context->uobjects_lock);
}
static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
......@@ -502,7 +495,6 @@ int rdma_explicit_destroy(struct ib_uobject *uobject)
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
uverbs_uobject_add(uobj);
spin_lock(&uobj->context->ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
......@@ -518,7 +510,6 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
uverbs_uobject_add(&uobj_file->uobj);
fd_install(uobj_file->uobj.id, uobj->object);
/* This shouldn't be used anymore. Use the file object instead */
uobj_file->uobj.id = 0;
......@@ -545,6 +536,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
assert_uverbs_usecnt(uobj, true);
atomic_set(&uobj->usecnt, 0);
mutex_lock(&uobj->context->uobjects_lock);
list_add(&uobj->list, &uobj->context->uobjects);
mutex_unlock(&uobj->context->uobjects_lock);
uobj->type->type_class->alloc_commit(uobj);
up_read(&uobj->context->cleanup_rwsem);
......
......@@ -3,20 +3,66 @@
* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
*/
#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/pid_namespace.h>
#include "cma_priv.h"
void rdma_restrack_init(struct rdma_restrack_root *res)
{
init_rwsem(&res->rwsem);
}
static const char *type2str(enum rdma_restrack_type type)
{
static const char * const names[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_PD] = "PD",
[RDMA_RESTRACK_CQ] = "CQ",
[RDMA_RESTRACK_QP] = "QP",
[RDMA_RESTRACK_CM_ID] = "CM_ID",
[RDMA_RESTRACK_MR] = "MR",
};
return names[type];
};
void rdma_restrack_clean(struct rdma_restrack_root *res)
{
WARN_ON_ONCE(!hash_empty(res->hash));
struct rdma_restrack_entry *e;
char buf[TASK_COMM_LEN];
struct ib_device *dev;
const char *owner;
int bkt;
if (hash_empty(res->hash))
return;
dev = container_of(res, struct ib_device, res);
pr_err("restrack: %s", CUT_HERE);
pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
dev->name);
hash_for_each(res->hash, bkt, e, node) {
if (rdma_is_kernel_res(e)) {
owner = e->kern_name;
} else {
/*
* There is no need to call get_task_struct here,
* because we can be here only if there are more
* get_task_struct() call than put_task_struct().
*/
get_task_comm(buf, e->task);
owner = buf;
}
pr_err("restrack: %s %s object allocated by %s is not freed\n",
rdma_is_kernel_res(e) ? "Kernel" : "User",
type2str(e->type), owner);
}
pr_err("restrack: %s", CUT_HERE);
}
int rdma_restrack_count(struct rdma_restrack_root *res,
......@@ -40,51 +86,48 @@ EXPORT_SYMBOL(rdma_restrack_count);
static void set_kern_name(struct rdma_restrack_entry *res)
{
enum rdma_restrack_type type = res->type;
struct ib_qp *qp;
if (type != RDMA_RESTRACK_QP)
/* PD and CQ types already have this name embedded in */
return;
struct ib_pd *pd;
qp = container_of(res, struct ib_qp, res);
if (!qp->pd) {
WARN_ONCE(true, "XRC QPs are not supported\n");
/* Survive, despite the programmer's error */
res->kern_name = " ";
return;
switch (res->type) {
case RDMA_RESTRACK_QP:
pd = container_of(res, struct ib_qp, res)->pd;
if (!pd) {
WARN_ONCE(true, "XRC QPs are not supported\n");
/* Survive, despite the programmer's error */
res->kern_name = " ";
}
break;
case RDMA_RESTRACK_MR:
pd = container_of(res, struct ib_mr, res)->pd;
break;
default:
/* Other types set kern_name directly */
pd = NULL;
break;
}
res->kern_name = qp->pd->res.kern_name;
if (pd)
res->kern_name = pd->res.kern_name;
}
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
enum rdma_restrack_type type = res->type;
struct ib_device *dev;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
switch (type) {
switch (res->type) {
case RDMA_RESTRACK_PD:
pd = container_of(res, struct ib_pd, res);
dev = pd->device;
break;
return container_of(res, struct ib_pd, res)->device;
case RDMA_RESTRACK_CQ:
cq = container_of(res, struct ib_cq, res);
dev = cq->device;
break;
return container_of(res, struct ib_cq, res)->device;
case RDMA_RESTRACK_QP:
qp = container_of(res, struct ib_qp, res);
dev = qp->device;
break;
return container_of(res, struct ib_qp, res)->device;
case RDMA_RESTRACK_CM_ID:
return container_of(res, struct rdma_id_private,
res)->id.device;
case RDMA_RESTRACK_MR:
return container_of(res, struct ib_mr, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
}
return dev;
}
static bool res_is_user(struct rdma_restrack_entry *res)
......@@ -96,6 +139,10 @@ static bool res_is_user(struct rdma_restrack_entry *res)
return container_of(res, struct ib_cq, res)->uobject;
case RDMA_RESTRACK_QP:
return container_of(res, struct ib_qp, res)->uobject;
case RDMA_RESTRACK_CM_ID:
return !res->kern_name;
case RDMA_RESTRACK_MR:
return container_of(res, struct ib_mr, res)->pd->uobject;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return false;
......@@ -109,13 +156,15 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
if (!dev)
return;
if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res))
res->task = NULL;
if (res_is_user(res)) {
get_task_struct(current);
res->task = current;
if (!res->task)
rdma_restrack_set_task(res, current);
res->kern_name = NULL;
} else {
set_kern_name(res);
res->task = NULL;
}
kref_init(&res->kref);
......
......@@ -1227,118 +1227,130 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr)
static int
roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec)
{
struct net_device *resolved_dev;
struct net_device *ndev;
struct net_device *idev;
struct rdma_dev_addr dev_addr = {
.bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
sa_path_get_ifindex(rec) : 0),
.net = sa_path_get_ndev(rec) ?
sa_path_get_ndev(rec) :
&init_net
};
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
int ret;
u16 gid_index;
int use_roce;
struct net_device *ndev = NULL;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->type = rdma_ah_find_type(device, port_num);
if (rec->roce.route_resolved)
return 0;
rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
if (!device->get_netdev)
return -EOPNOTSUPP;
if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
(rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)))
rdma_ah_set_make_grd(ah_attr, true);
rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
rdma_ah_set_sl(ah_attr, rec->sl);
rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
get_src_path_mask(device, port_num));
rdma_ah_set_port_num(ah_attr, port_num);
rdma_ah_set_static_rate(ah_attr, rec->rate);
use_roce = rdma_cap_eth_ah(device, port_num);
if (use_roce) {
struct net_device *idev;
struct net_device *resolved_dev;
struct rdma_dev_addr dev_addr = {
.bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
sa_path_get_ifindex(rec) : 0),
.net = sa_path_get_ndev(rec) ?
sa_path_get_ndev(rec) :
&init_net
};
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
if (!device->get_netdev)
return -EOPNOTSUPP;
rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
/* validate the route */
ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
&dgid_addr._sockaddr, &dev_addr);
if (ret)
return ret;
/* validate the route */
ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
&dgid_addr._sockaddr, &dev_addr);
if (ret)
return ret;
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
idev = device->get_netdev(device, port_num);
if (!idev)
return -ENODEV;
idev = device->get_netdev(device, port_num);
if (!idev)
return -ENODEV;
resolved_dev = dev_get_by_index(dev_addr.net,
dev_addr.bound_dev_if);
if (!resolved_dev) {
dev_put(idev);
return -ENODEV;
}
ndev = ib_get_ndev_from_path(rec);
rcu_read_lock();
if ((ndev && ndev != resolved_dev) ||
(resolved_dev != idev &&
!rdma_is_upper_dev_rcu(idev, resolved_dev)))
ret = -EHOSTUNREACH;
rcu_read_unlock();
dev_put(idev);
dev_put(resolved_dev);
if (ret) {
if (ndev)
dev_put(ndev);
return ret;
}
resolved_dev = dev_get_by_index(dev_addr.net,
dev_addr.bound_dev_if);
if (!resolved_dev) {
ret = -ENODEV;
goto done;
}
ndev = ib_get_ndev_from_path(rec);
rcu_read_lock();
if ((ndev && ndev != resolved_dev) ||
(resolved_dev != idev &&
!rdma_is_upper_dev_rcu(idev, resolved_dev)))
ret = -EHOSTUNREACH;
rcu_read_unlock();
dev_put(resolved_dev);
if (ndev)
dev_put(ndev);
done:
dev_put(idev);
if (!ret)
rec->roce.route_resolved = true;
return ret;
}
if (rec->hop_limit > 0 || use_roce) {
enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr)
{
enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
struct net_device *ndev;
u16 gid_index;
int ret;
ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
port_num, ndev, &gid_index);
if (ret) {
if (ndev)
dev_put(ndev);
return ret;
}
ndev = ib_get_ndev_from_path(rec);
ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
port_num, ndev, &gid_index);
if (ndev)
dev_put(ndev);
if (ret)
return ret;
rdma_ah_set_grh(ah_attr, &rec->dgid,
be32_to_cpu(rec->flow_label),
gid_index, rec->hop_limit,
rec->traffic_class);
if (ndev)
dev_put(ndev);
}
rdma_ah_set_grh(ah_attr, &rec->dgid,
be32_to_cpu(rec->flow_label),
gid_index, rec->hop_limit,
rec->traffic_class);
return 0;
}
if (use_roce) {
u8 *dmac = sa_path_get_dmac(rec);
int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr)
{
int ret = 0;
memset(ah_attr, 0, sizeof(*ah_attr));
ah_attr->type = rdma_ah_find_type(device, port_num);
rdma_ah_set_sl(ah_attr, rec->sl);
rdma_ah_set_port_num(ah_attr, port_num);
rdma_ah_set_static_rate(ah_attr, rec->rate);
if (!dmac)
return -EINVAL;
memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
if (sa_path_is_roce(rec)) {
ret = roce_resolve_route_from_path(device, port_num, rec);
if (ret)
return ret;
memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
} else {
rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
if (sa_path_is_opa(rec) &&
rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
rdma_ah_set_make_grd(ah_attr, true);
rdma_ah_set_path_bits(ah_attr,
be32_to_cpu(sa_path_get_slid(rec)) &
get_src_path_mask(device, port_num));
}
return 0;
if (rec->hop_limit > 0 || sa_path_is_roce(rec))
ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
return ret;
}
EXPORT_SYMBOL(ib_init_ah_attr_from_path);
......
......@@ -273,6 +273,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
speed = " SDR";
rate = 25;
break;
}
......@@ -388,14 +389,26 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
union ib_gid *pgid;
union ib_gid gid;
ssize_t ret;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
if (ret)
return ret;
return sprintf(buf, "%pI6\n", gid.raw);
/* If reading GID fails, it is likely due to GID entry being empty
* (invalid) or reserved GID in the table.
* User space expects to read GID table entries as long as it given
* index is within GID table size.
* Administrative/debugging tool fails to query rest of the GID entries
* if it hits error while querying a GID of the given index.
* To avoid user space throwing such error on fail to read gid, return
* zero GID as before. This maintains backward compatibility.
*/
if (ret)
pgid = &zgid;
else
pgid = &gid;
return sprintf(buf, "%pI6\n", pgid->raw);
}
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
......@@ -810,10 +823,15 @@ static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
dev = port->ibdev;
stats = port->hw_stats;
}
mutex_lock(&stats->lock);
ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
if (ret)
return ret;
return print_hw_stat(stats, hsa->index, buf);
goto unlock;
ret = print_hw_stat(stats, hsa->index, buf);
unlock:
mutex_unlock(&stats->lock);
return ret;
}
static ssize_t show_stats_lifespan(struct kobject *kobj,
......@@ -821,17 +839,25 @@ static ssize_t show_stats_lifespan(struct kobject *kobj,
char *buf)
{
struct hw_stats_attribute *hsa;
struct rdma_hw_stats *stats;
int msecs;
hsa = container_of(attr, struct hw_stats_attribute, attr);
if (!hsa->port_num) {
struct ib_device *dev = container_of((struct device *)kobj,
struct ib_device, dev);
msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
stats = dev->hw_stats;
} else {
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
msecs = jiffies_to_msecs(p->hw_stats->lifespan);
stats = p->hw_stats;
}
mutex_lock(&stats->lock);
msecs = jiffies_to_msecs(stats->lifespan);
mutex_unlock(&stats->lock);
return sprintf(buf, "%d\n", msecs);
}
......@@ -840,6 +866,7 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
const char *buf, size_t count)
{
struct hw_stats_attribute *hsa;
struct rdma_hw_stats *stats;
int msecs;
int jiffies;
int ret;
......@@ -854,11 +881,18 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
if (!hsa->port_num) {
struct ib_device *dev = container_of((struct device *)kobj,
struct ib_device, dev);
dev->hw_stats->lifespan = jiffies;
stats = dev->hw_stats;
} else {
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
p->hw_stats->lifespan = jiffies;
stats = p->hw_stats;
}
mutex_lock(&stats->lock);
stats->lifespan = jiffies;
mutex_unlock(&stats->lock);
return count;
}
......@@ -951,6 +985,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
sysfs_attr_init(hsag->attrs[i]);
}
mutex_init(&stats->lock);
/* treat an error here as non-fatal */
hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
if (hsag->attrs[i])
......
......@@ -430,7 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
uevent->resp.id = ctx->id;
}
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp, sizeof(uevent->resp))) {
result = -EFAULT;
goto done;
......@@ -441,7 +441,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
result = -ENOMEM;
goto done;
}
if (copy_to_user((void __user *)(unsigned long)cmd.data,
if (copy_to_user(u64_to_user_ptr(cmd.data),
uevent->data, uevent->data_len)) {
result = -EFAULT;
goto done;
......@@ -453,7 +453,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
result = -ENOMEM;
goto done;
}
if (copy_to_user((void __user *)(unsigned long)cmd.info,
if (copy_to_user(u64_to_user_ptr(cmd.info),
uevent->info, uevent->info_len)) {
result = -EFAULT;
goto done;
......@@ -502,7 +502,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
}
resp.id = ctx->id;
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
result = -EFAULT;
goto err2;
......@@ -556,7 +556,7 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
ib_ucm_cleanup_events(ctx);
resp.events_reported = ctx->events_reported;
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
......@@ -588,7 +588,7 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
resp.local_id = ctx->cm_id->local_id;
resp.remote_id = ctx->cm_id->remote_id;
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
......@@ -625,7 +625,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
......@@ -699,7 +699,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
if (!len)
return 0;
data = memdup_user((void __user *)(unsigned long)src, len);
data = memdup_user(u64_to_user_ptr(src), len);
if (IS_ERR(data))
return PTR_ERR(data);
......@@ -721,7 +721,7 @@ static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
if (!sa_path)
return -ENOMEM;
if (copy_from_user(&upath, (void __user *)(unsigned long)src,
if (copy_from_user(&upath, u64_to_user_ptr(src),
sizeof(upath))) {
kfree(sa_path);
......
......@@ -382,7 +382,11 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
struct ucma_event *uevent;
int ret = 0;
if (out_len < sizeof uevent->resp)
/*
* Old 32 bit user space does not send the 4 byte padding in the
* reserved field. We don't care, allow it to keep working.
*/
if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
......@@ -416,8 +420,9 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
uevent->resp.id = ctx->id;
}
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&uevent->resp, sizeof uevent->resp)) {
if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp,
min_t(size_t, out_len, sizeof(uevent->resp)))) {
ret = -EFAULT;
goto done;
}
......@@ -477,15 +482,15 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
return -ENOMEM;
ctx->uid = cmd.uid;
cm_id = rdma_create_id(current->nsproxy->net_ns,
ucma_event_handler, ctx, cmd.ps, qp_type);
cm_id = __rdma_create_id(current->nsproxy->net_ns,
ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto err1;
}
resp.id = ctx->id;
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err2;
......@@ -615,7 +620,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
}
resp.events_reported = ucma_free_ctx(ctx);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
......@@ -845,7 +850,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
out:
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
......@@ -991,7 +996,7 @@ static ssize_t ucma_query(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
response = (void __user *)(unsigned long) cmd.response;
response = u64_to_user_ptr(cmd.response);
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
......@@ -1094,12 +1099,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
ret = rdma_accept(ctx->cm_id, &conn_param);
ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
if (!ret)
ctx->uid = cmd.uid;
mutex_unlock(&file->mut);
} else
ret = rdma_accept(ctx->cm_id, NULL);
ret = __rdma_accept(ctx->cm_id, NULL, NULL);
ucma_put_ctx(ctx);
return ret;
......@@ -1179,7 +1184,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
goto out;
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
......@@ -1241,6 +1246,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
if (!optlen)
return -EINVAL;
if (!ctx->cm_id->device)
return -EINVAL;
memset(&sa_path, 0, sizeof(sa_path));
sa_path.rec_type = SA_PATH_REC_TYPE_IB;
......@@ -1315,7 +1323,7 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
return -EINVAL;
optval = memdup_user((void __user *) (unsigned long) cmd.optval,
optval = memdup_user(u64_to_user_ptr(cmd.optval),
cmd.optlen);
if (IS_ERR(optval)) {
ret = PTR_ERR(optval);
......@@ -1395,7 +1403,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
goto err2;
resp.id = mc->id;
if (copy_to_user((void __user *)(unsigned long) cmd->response,
if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err3;
......@@ -1500,7 +1508,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
resp.events_reported = mc->events_reported;
kfree(mc);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
out:
......@@ -1587,7 +1595,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
ucma_unlock_files(cur_file, new_file);
response:
if (copy_to_user((void __user *)(unsigned long)cmd.response,
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment