Commit 4c84a39c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (46 commits)
  IB/uverbs: Don't serialize with ib_uverbs_idr_mutex
  IB/mthca: Make all device methods truly reentrant
  IB/mthca: Fix memory leak on modify_qp error paths
  IB/uverbs: Factor out common idr code
  IB/uverbs: Don't decrement usecnt on error paths
  IB/uverbs: Release lock on error path
  IB/cm: Use address handle helpers
  IB/sa: Add ib_init_ah_from_path()
  IB: Add ib_init_ah_from_wc()
  IB/ucm: Get rid of duplicate P_Key parameter
  IB/srp: Factor out common request reset code
  IB/srp: Support SRP rev. 10 targets
  [SCSI] srp.h: Add I/O Class values
  IB/fmr: Use device's max_map_map_per_fmr attribute in FMR pool.
  IB/mthca: Fill in max_map_per_fmr device attribute
  IB/ipath: Add client reregister event generation
  IB/mthca: Add client reregister event generation
  IB: Move struct port_info from ipath to <rdma/ib_smi.h>
  IPoIB: Handle client reregister events
  IB: Add client reregister event type
  ...
parents d0b952a9 9ead190b
IP OVER INFINIBAND
The ib_ipoib driver is an implementation of the IP over InfiniBand
protocol as specified by the latest Internet-Drafts issued by the
IETF ipoib working group. It is a "native" implementation in the
sense of setting the interface type to ARPHRD_INFINIBAND and the
hardware address length to 20 (earlier proprietary implementations
protocol as specified by RFC 4391 and 4392, issued by the IETF ipoib
working group. It is a "native" implementation in the sense of
setting the interface type to ARPHRD_INFINIBAND and the hardware
address length to 20 (earlier proprietary implementations
masqueraded to the kernel as ethernet interfaces).
Partitions and P_Keys
......@@ -53,3 +53,7 @@ References
IETF IP over InfiniBand (ipoib) Working Group
http://ietf.org/html.charters/ipoib-charter.html
Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
http://ietf.org/rfc/rfc4391.txt
IP over InfiniBand (IPoIB) Architecture (RFC 4392)
http://ietf.org/rfc/rfc4392.txt
......@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
<http://www.openib.org>.
config INFINIBAND_ADDR_TRANS
bool
depends on INFINIBAND && INET
default y
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ipath/Kconfig"
......
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
ib_cm.o
ib_cm.o $(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o
......@@ -12,8 +14,13 @@ ib_sa-y := sa_query.o
ib_cm-y := cm.o
rdma_cm-y := cma.o
ib_addr-y := addr.o
ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \
uverbs_marshall.o
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
*
* This Software is licensed under one of the following licenses:
*
* 1) under the terms of the "Common Public License 1.0" a copy of which is
* available from the Open Source Initiative, see
* http://www.opensource.org/licenses/cpl.php.
*
* 2) under the terms of the "The BSD License" a copy of which is
* available from the Open Source Initiative, see
* http://www.opensource.org/licenses/bsd-license.php.
*
* 3) under the terms of the "GNU General Public License (GPL) Version 2" a
* copy of which is available from the Open Source Initiative, see
* http://www.opensource.org/licenses/gpl-license.php.
*
* Licensee has the right to choose one of the above licenses.
*
* Redistributions of source code must retain the above copyright
* notice and one of the license notices.
*
* Redistributions in binary form must reproduce both the above copyright
* notice, one of the license notices in the documentation
* and/or other materials provided with the distribution.
*/
#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <rdma/ib_addr.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");
struct addr_req {
struct list_head list;
struct sockaddr src_addr;
struct sockaddr dst_addr;
struct rdma_dev_addr *addr;
void *context;
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context);
unsigned long timeout;
int status;
};
static void process_req(void *data);
static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
static DECLARE_WORK(work, process_req, NULL);
static struct workqueue_struct *addr_wq;
static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
unsigned char *dst_dev_addr)
{
switch (dev->type) {
case ARPHRD_INFINIBAND:
dev_addr->dev_type = IB_NODE_CA;
break;
default:
return -EADDRNOTAVAIL;
}
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
return 0;
}
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
{
struct net_device *dev;
u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
int ret;
dev = ip_dev_find(ip);
if (!dev)
return -EADDRNOTAVAIL;
ret = copy_addr(dev_addr, dev, NULL);
dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);
static void set_timeout(unsigned long time)
{
unsigned long delay;
cancel_delayed_work(&work);
delay = time - jiffies;
if ((long)delay <= 0)
delay = 1;
queue_delayed_work(addr_wq, &work, delay);
}
static void queue_req(struct addr_req *req)
{
struct addr_req *temp_req;
mutex_lock(&lock);
list_for_each_entry_reverse(temp_req, &req_list, list) {
if (time_after(req->timeout, temp_req->timeout))
break;
}
list_add(&req->list, &temp_req->list);
if (req_list.next == &req->list)
set_timeout(req->timeout);
mutex_unlock(&lock);
}
static void addr_send_arp(struct sockaddr_in *dst_in)
{
struct rtable *rt;
struct flowi fl;
u32 dst_ip = dst_in->sin_addr.s_addr;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
if (ip_route_output_key(&rt, &fl))
return;
arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
ip_rt_put(rt);
}
static int addr_resolve_remote(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
u32 src_ip = src_in->sin_addr.s_addr;
u32 dst_ip = dst_in->sin_addr.s_addr;
struct flowi fl;
struct rtable *rt;
struct neighbour *neigh;
int ret;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
ret = ip_route_output_key(&rt, &fl);
if (ret)
goto out;
/* If the device does ARP internally, return 'done' */
if (rt->idev->dev->flags & IFF_NOARP) {
copy_addr(addr, rt->idev->dev, NULL);
goto put;
}
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
if (!neigh) {
ret = -ENODATA;
goto put;
}
if (!(neigh->nud_state & NUD_VALID)) {
ret = -ENODATA;
goto release;
}
if (!src_ip) {
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = rt->rt_src;
}
ret = copy_addr(addr, neigh->dev, neigh->ha);
release:
neigh_release(neigh);
put:
ip_rt_put(rt);
out:
return ret;
}
static void process_req(void *data)
{
struct addr_req *req, *temp_req;
struct sockaddr_in *src_in, *dst_in;
struct list_head done_list;
INIT_LIST_HEAD(&done_list);
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->status) {
src_in = (struct sockaddr_in *) &req->src_addr;
dst_in = (struct sockaddr_in *) &req->dst_addr;
req->status = addr_resolve_remote(src_in, dst_in,
req->addr);
}
if (req->status && time_after(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
continue;
list_del(&req->list);
list_add_tail(&req->list, &done_list);
}
if (!list_empty(&req_list)) {
req = list_entry(req_list.next, struct addr_req, list);
set_timeout(req->timeout);
}
mutex_unlock(&lock);
list_for_each_entry_safe(req, temp_req, &done_list, list) {
list_del(&req->list);
req->callback(req->status, &req->src_addr, req->addr,
req->context);
kfree(req);
}
}
static int addr_resolve_local(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
struct net_device *dev;
u32 src_ip = src_in->sin_addr.s_addr;
u32 dst_ip = dst_in->sin_addr.s_addr;
int ret;
dev = ip_dev_find(dst_ip);
if (!dev)
return -EADDRNOTAVAIL;
if (ZERONET(src_ip)) {
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = dst_ip;
ret = copy_addr(addr, dev, dev->dev_addr);
} else if (LOOPBACK(src_ip)) {
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
} else {
ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
}
dev_put(dev);
return ret;
}
int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
void *context)
{
struct sockaddr_in *src_in, *dst_in;
struct addr_req *req;
int ret = 0;
req = kmalloc(sizeof *req, GFP_KERNEL);
if (!req)
return -ENOMEM;
memset(req, 0, sizeof *req);
if (src_addr)
memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
src_in = (struct sockaddr_in *) &req->src_addr;
dst_in = (struct sockaddr_in *) &req->dst_addr;
req->status = addr_resolve_local(src_in, dst_in, addr);
if (req->status == -EADDRNOTAVAIL)
req->status = addr_resolve_remote(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
queue_req(req);
break;
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
addr_send_arp(dst_in);
break;
default:
ret = req->status;
kfree(req);
break;
}
return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->addr == addr) {
req->status = -ECANCELED;
req->timeout = jiffies;
list_del(&req->list);
list_add(&req->list, &req_list);
set_timeout(req->timeout);
break;
}
}
mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);
static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pkt, struct net_device *orig_dev)
{
struct arphdr *arp_hdr;
arp_hdr = (struct arphdr *) skb->nh.raw;
if (arp_hdr->ar_op == htons(ARPOP_REQUEST) ||
arp_hdr->ar_op == htons(ARPOP_REPLY))
set_timeout(jiffies);
kfree_skb(skb);
return 0;
}
static struct packet_type addr_arp = {
.type = __constant_htons(ETH_P_ARP),
.func = addr_arp_recv,
.af_packet_priv = (void*) 1,
};
static int addr_init(void)
{
addr_wq = create_singlethread_workqueue("ib_addr_wq");
if (!addr_wq)
return -ENOMEM;
dev_add_pack(&addr_arp);
return 0;
}
static void addr_cleanup(void)
{
dev_remove_pack(&addr_arp);
destroy_workqueue(addr_wq);
}
module_init(addr_init);
module_exit(addr_cleanup);
......@@ -191,6 +191,24 @@ int ib_find_cached_pkey(struct ib_device *device,
}
EXPORT_SYMBOL(ib_find_cached_pkey);
int ib_get_cached_lmc(struct ib_device *device,
u8 port_num,
u8 *lmc)
{
unsigned long flags;
int ret = 0;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
*lmc = device->cache.lmc_cache[port_num - start_port(device)];
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
static void ib_cache_update(struct ib_device *device,
u8 port)
{
......@@ -251,6 +269,8 @@ static void ib_cache_update(struct ib_device *device,
device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
device->cache.gid_cache [port - start_port(device)] = gid_cache;
device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
write_unlock_irq(&device->cache.lock);
kfree(old_pkey_cache);
......@@ -305,7 +325,13 @@ static void ib_cache_setup_one(struct ib_device *device)
kmalloc(sizeof *device->cache.gid_cache *
(end_port(device) - start_port(device) + 1), GFP_KERNEL);
if (!device->cache.pkey_cache || !device->cache.gid_cache) {
device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
(end_port(device) -
start_port(device) + 1),
GFP_KERNEL);
if (!device->cache.pkey_cache || !device->cache.gid_cache ||
!device->cache.lmc_cache) {
printk(KERN_WARNING "Couldn't allocate cache "
"for %s\n", device->name);
goto err;
......@@ -333,6 +359,7 @@ static void ib_cache_setup_one(struct ib_device *device)
err:
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
}
static void ib_cache_cleanup_one(struct ib_device *device)
......@@ -349,6 +376,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
}
static struct ib_client cache_client = {
......
......@@ -32,7 +32,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $
* $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $
*/
#include <linux/completion.h>
......@@ -132,6 +132,7 @@ struct cm_id_private {
/* todo: use alternate port on send failure */
struct cm_av av;
struct cm_av alt_av;
struct ib_cm_compare_data *compare_data;
void *private_data;
__be64 tid;
......@@ -253,23 +254,13 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num,
u16 dlid, u8 sl, u16 src_path_bits)
{
memset(ah_attr, 0, sizeof ah_attr);
ah_attr->dlid = dlid;
ah_attr->sl = sl;
ah_attr->src_path_bits = src_path_bits;
ah_attr->port_num = port_num;
}
static void cm_init_av_for_response(struct cm_port *port,
struct ib_wc *wc, struct cm_av *av)
static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
struct ib_grh *grh, struct cm_av *av)
{
av->port = port;
av->pkey_index = wc->pkey_index;
cm_set_ah_attr(&av->ah_attr, port->port_num, wc->slid,
wc->sl, wc->dlid_path_bits);
ib_init_ah_from_wc(port->cm_dev->device, port->port_num, wc,
grh, &av->ah_attr);
}
static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
......@@ -299,9 +290,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
return ret;
av->port = port;
cm_set_ah_attr(&av->ah_attr, av->port->port_num,
be16_to_cpu(path->dlid), path->sl,
be16_to_cpu(path->slid) & 0x7F);
ib_init_ah_from_path(cm_dev->device, port->port_num, path,
&av->ah_attr);
av->packet_life_time = path->packet_life_time;
return 0;
}
......@@ -357,6 +347,41 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
return cm_id_priv;
}
static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
{
int i;
for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
((unsigned long *) mask)[i];
}
static int cm_compare_data(struct ib_cm_compare_data *src_data,
struct ib_cm_compare_data *dst_data)
{
u8 src[IB_CM_COMPARE_SIZE];
u8 dst[IB_CM_COMPARE_SIZE];
if (!src_data || !dst_data)
return 0;
cm_mask_copy(src, src_data->data, dst_data->mask);
cm_mask_copy(dst, dst_data->data, src_data->mask);
return memcmp(src, dst, IB_CM_COMPARE_SIZE);
}
static int cm_compare_private_data(u8 *private_data,
struct ib_cm_compare_data *dst_data)
{
u8 src[IB_CM_COMPARE_SIZE];
if (!dst_data)
return 0;
cm_mask_copy(src, private_data, dst_data->mask);
return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
}
static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
{
struct rb_node **link = &cm.listen_service_table.rb_node;
......@@ -364,14 +389,18 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
struct cm_id_private *cur_cm_id_priv;
__be64 service_id = cm_id_priv->id.service_id;
__be64 service_mask = cm_id_priv->id.service_mask;
int data_cmp;
while (*link) {
parent = *link;
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
data_cmp = cm_compare_data(cm_id_priv->compare_data,
cur_cm_id_priv->compare_data);
if ((cur_cm_id_priv->id.service_mask & service_id) ==
(service_mask & cur_cm_id_priv->id.service_id) &&
(cm_id_priv->id.device == cur_cm_id_priv->id.device))
(cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
!data_cmp)
return cur_cm_id_priv;
if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
......@@ -380,6 +409,10 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
link = &(*link)->rb_right;
else if (service_id < cur_cm_id_priv->id.service_id)
link = &(*link)->rb_left;
else if (service_id > cur_cm_id_priv->id.service_id)
link = &(*link)->rb_right;
else if (data_cmp < 0)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
}
......@@ -389,16 +422,20 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
}
static struct cm_id_private * cm_find_listen(struct ib_device *device,
__be64 service_id)
__be64 service_id,
u8 *private_data)
{
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
int data_cmp;
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
data_cmp = cm_compare_private_data(private_data,
cm_id_priv->compare_data);
if ((cm_id_priv->id.service_mask & service_id) ==
cm_id_priv->id.service_id &&
(cm_id_priv->id.device == device))
(cm_id_priv->id.device == device) && !data_cmp)
return cm_id_priv;
if (device < cm_id_priv->id.device)
......@@ -407,6 +444,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
node = node->rb_right;
else if (service_id < cm_id_priv->id.service_id)
node = node->rb_left;
else if (service_id > cm_id_priv->id.service_id)
node = node->rb_right;
else if (data_cmp < 0)
node = node->rb_left;
else
node = node->rb_right;
}
......@@ -730,15 +771,14 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
wait_for_completion(&cm_id_priv->comp);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
if (cm_id_priv->private_data && cm_id_priv->private_data_len)
kfree(cm_id_priv->compare_data);
kfree(cm_id_priv->private_data);
kfree(cm_id_priv);
}
EXPORT_SYMBOL(ib_destroy_cm_id);
int ib_cm_listen(struct ib_cm_id *cm_id,
__be64 service_id,
__be64 service_mask)
int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
struct ib_cm_compare_data *compare_data)
{
struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
unsigned long flags;
......@@ -752,7 +792,19 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
BUG_ON(cm_id->state != IB_CM_IDLE);
if (cm_id->state != IB_CM_IDLE)
return -EINVAL;
if (compare_data) {
cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
GFP_KERNEL);
if (!cm_id_priv->compare_data)
return -ENOMEM;
cm_mask_copy(cm_id_priv->compare_data->data,
compare_data->data, compare_data->mask);
memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
IB_CM_COMPARE_SIZE);
}
cm_id->state = IB_CM_LISTEN;
......@@ -769,6 +821,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
if (cur_cm_id_priv) {
cm_id->state = IB_CM_IDLE;
kfree(cm_id_priv->compare_data);
cm_id_priv->compare_data = NULL;
ret = -EBUSY;
}
return ret;
......@@ -1241,7 +1295,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
/* Find matching listen request. */
listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
req_msg->service_id);
req_msg->service_id,
req_msg->private_data);
if (!listen_cm_id_priv) {
spin_unlock_irqrestore(&cm.lock, flags);
cm_issue_rej(work->port, work->mad_recv_wc,
......@@ -1276,6 +1331,7 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
cm_id_priv->id.remote_id = req_msg->local_comm_id;
cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
id.local_id);
......@@ -2549,7 +2605,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
sidr_req_msg->request_id = cm_id_priv->id.local_id;
sidr_req_msg->pkey = cpu_to_be16(param->pkey);
sidr_req_msg->pkey = cpu_to_be16(param->path->pkey);
sidr_req_msg->service_id = param->service_id;
if (param->private_data && param->private_data_len)
......@@ -2641,6 +2697,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
cm_id_priv->av.dgid.global.interface_id = 0;
cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
cm_id_priv->id.remote_id = sidr_req_msg->request_id;
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
......@@ -2654,7 +2711,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
goto out; /* Duplicate message. */
}
cur_cm_id_priv = cm_find_listen(cm_id->device,
sidr_req_msg->service_id);
sidr_req_msg->service_id,
sidr_req_msg->private_data);
if (!cur_cm_id_priv) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
spin_unlock_irqrestore(&cm.lock, flags);
......@@ -3291,7 +3349,6 @@ static int __init ib_cm_init(void)
static void __exit ib_cm_cleanup(void)
{
flush_workqueue(cm.wq);
destroy_workqueue(cm.wq);
ib_unregister_client(&cm_client);
idr_destroy(&cm.local_id_table);
......
This diff is collapsed.
......@@ -54,7 +54,7 @@ enum {
/*
* If an FMR is not in use, then the list member will point to either
* its pool's free_list (if the FMR can be mapped again; that is,
* remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the
* remap_count < pool->max_remaps) or its pool's dirty_list (if the
* FMR needs to be unmapped before being remapped). In either of
* these cases it is a bug if the ref_count is not 0. In other words,
* if ref_count is > 0, then the list member must not be linked into
......@@ -84,6 +84,7 @@ struct ib_fmr_pool {
int pool_size;
int max_pages;
int max_remaps;
int dirty_watermark;
int dirty_len;
struct list_head free_list;
......@@ -214,8 +215,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
{
struct ib_device *device;
struct ib_fmr_pool *pool;
struct ib_device_attr *attr;
int i;
int ret;
int max_remaps;
if (!params)
return ERR_PTR(-EINVAL);
......@@ -228,6 +231,26 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
return ERR_PTR(-ENOSYS);
}
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
printk(KERN_WARNING "couldn't allocate device attr struct");
return ERR_PTR(-ENOMEM);
}
ret = ib_query_device(device, attr);
if (ret) {
printk(KERN_WARNING "couldn't query device");
kfree(attr);
return ERR_PTR(ret);
}
if (!attr->max_map_per_fmr)
max_remaps = IB_FMR_MAX_REMAPS;
else
max_remaps = attr->max_map_per_fmr;
kfree(attr);
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
printk(KERN_WARNING "couldn't allocate pool struct");
......@@ -258,6 +281,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
pool->pool_size = 0;
pool->max_pages = params->max_pages_per_fmr;
pool->max_remaps = max_remaps;
pool->dirty_watermark = params->dirty_watermark;
pool->dirty_len = 0;
spin_lock_init(&pool->pool_lock);
......@@ -279,7 +303,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
struct ib_pool_fmr *fmr;
struct ib_fmr_attr attr = {
.max_pages = params->max_pages_per_fmr,
.max_maps = IB_FMR_MAX_REMAPS,
.max_maps = pool->max_remaps,
.page_shift = params->page_shift
};
......@@ -489,7 +513,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
--fmr->ref_count;
if (!fmr->ref_count) {
if (fmr->remap_count < IB_FMR_MAX_REMAPS) {
if (fmr->remap_count < pool->max_remaps) {
list_add_tail(&fmr->list, &pool->free_list);
} else {
list_add_tail(&fmr->list, &pool->dirty_list);
......
......@@ -34,6 +34,7 @@
* $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#include <linux/dma-mapping.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
#include "mad_rmpp.h"
......@@ -45,8 +46,7 @@ MODULE_DESCRIPTION("kernel IB MAD API");
MODULE_AUTHOR("Hal Rosenstock");
MODULE_AUTHOR("Sean Hefty");
kmem_cache_t *ib_mad_cache;
static kmem_cache_t *ib_mad_cache;
static struct list_head ib_mad_port_list;
static u32 ib_mad_client_id = 0;
......@@ -1673,20 +1673,21 @@ static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
rwc->recv_buf.mad->mad_hdr.mgmt_class;
}
static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_send_wr_private *wr,
struct ib_mad_recv_wc *rwc )
{
struct ib_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
send_resp = ((struct ib_mad *)(wr->send_buf.mad))->
mad_hdr.method & IB_MGMT_METHOD_RESP;
rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP;
if (!send_resp && rcv_resp)
/* is request/response. GID/LIDs are both local (same). */
return 1;
if (send_resp == rcv_resp)
/* both requests, or both responses. GIDs different */
return 0;
......@@ -1695,48 +1696,78 @@ static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
/* Assume not equal, to avoid false positives. */
return 0;
if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH))
return attr.dlid == rwc->wc->slid;
else if ((attr.ah_flags & IB_AH_GRH) &&
(rwc->wc->wc_flags & IB_WC_GRH))
return memcmp(attr.grh.dgid.raw,
rwc->recv_buf.grh->sgid.raw, 16) == 0;
else
if (!!(attr.ah_flags & IB_AH_GRH) !=
!!(rwc->wc->wc_flags & IB_WC_GRH))
/* one has GID, other does not. Assume different */
return 0;
if (!send_resp && rcv_resp) {
/* is request/response. */
if (!(attr.ah_flags & IB_AH_GRH)) {
if (ib_get_cached_lmc(device, port_num, &lmc))
return 0;
return (!lmc || !((attr.src_path_bits ^
rwc->wc->dlid_path_bits) &
((1 << lmc) - 1)));
} else {
if (ib_get_cached_gid(device, port_num,
attr.grh.sgid_index, &sgid))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
}
}
if (!(attr.ah_flags & IB_AH_GRH))
return attr.dlid == rwc->wc->slid;
else
return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
16);
}
static inline int is_direct(u8 class)
{
return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
}
struct ib_mad_send_wr_private*
ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_recv_wc *mad_recv_wc)
struct ib_mad_recv_wc *wc)
{
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wr_private *wr;
struct ib_mad *mad;
mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad;
mad = (struct ib_mad *)wc->recv_buf.mad;
list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
agent_list) {
if ((mad_send_wr->tid == mad->mad_hdr.tid) &&
rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
rcv_has_same_gid(mad_send_wr, mad_recv_wc))
return mad_send_wr;
list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
if ((wr->tid == mad->mad_hdr.tid) &&
rcv_has_same_class(wr, wc) &&
/*
* Don't check GID for direct routed MADs.
* These might have permissive LIDs.
*/
(is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
return wr;
}
/*
* It's possible to receive the response before we've
* been notified that the send has completed
*/
list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
agent_list) {
if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
mad_send_wr->tid == mad->mad_hdr.tid &&
mad_send_wr->timeout &&
rcv_has_same_class(mad_send_wr, mad_recv_wc) &&
rcv_has_same_gid(mad_send_wr, mad_recv_wc)) {
list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
if (is_data_mad(mad_agent_priv, wr->send_buf.mad) &&
wr->tid == mad->mad_hdr.tid &&
wr->timeout &&
rcv_has_same_class(wr, wc) &&
/*
* Don't check GID for direct routed MADs.
* These might have permissive LIDs.
*/
(is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
/* Verify request has not been canceled */
return (mad_send_wr->status == IB_WC_SUCCESS) ?
mad_send_wr : NULL;
}
return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
}
return NULL;
}
......
......@@ -212,8 +212,6 @@ struct ib_mad_port_private {
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
extern kmem_cache_t *ib_mad_cache;
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
struct ib_mad_send_wr_private *
......
......@@ -47,6 +47,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
#include <rdma/ib_cache.h>
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
......@@ -441,6 +442,36 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
}
EXPORT_SYMBOL(ib_sa_cancel_query);
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->dlid);
ah_attr->sl = rec->sl;
ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
ah_attr->port_num = port_num;
if (rec->hop_limit > 1) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
&gid_index);
if (ret)
return ret;
ah_attr->grh.sgid_index = gid_index;
ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
}
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
{
unsigned long flags;
......
This diff is collapsed.
......@@ -132,7 +132,7 @@ struct ib_ucq_object {
u32 async_events_reported;
};
extern struct mutex ib_uverbs_idr_mutex;
extern spinlock_t ib_uverbs_idr_lock;
extern struct idr ib_uverbs_pd_idr;
extern struct idr ib_uverbs_mr_idr;
extern struct idr ib_uverbs_mw_idr;
......@@ -141,6 +141,8 @@ extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
int is_async, int *fd);
void ib_uverbs_release_event_file(struct kref *ref);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -125,35 +125,47 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
EXPORT_SYMBOL(ib_create_ah);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
struct ib_grh *grh, u8 port_num)
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
struct ib_grh *grh, struct ib_ah_attr *ah_attr)
{
struct ib_ah_attr ah_attr;
u32 flow_class;
u16 gid_index;
int ret;
memset(&ah_attr, 0, sizeof ah_attr);
ah_attr.dlid = wc->slid;
ah_attr.sl = wc->sl;
ah_attr.src_path_bits = wc->dlid_path_bits;
ah_attr.port_num = port_num;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = wc->slid;
ah_attr->sl = wc->sl;
ah_attr->src_path_bits = wc->dlid_path_bits;
ah_attr->port_num = port_num;
if (wc->wc_flags & IB_WC_GRH) {
ah_attr.ah_flags = IB_AH_GRH;
ah_attr.grh.dgid = grh->sgid;
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = grh->sgid;
ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num,
ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
&gid_index);
if (ret)
return ERR_PTR(ret);
return ret;
ah_attr.grh.sgid_index = (u8) gid_index;
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
ah_attr.grh.flow_label = flow_class & 0xFFFFF;
ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF;
ah_attr.grh.hop_limit = grh->hop_limit;
ah_attr->grh.flow_label = flow_class & 0xFFFFF;
ah_attr->grh.hop_limit = grh->hop_limit;
ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
}
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
struct ib_grh *grh, u8 port_num)
{
struct ib_ah_attr ah_attr;
int ret;
ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
return ib_create_ah(pd, &ah_attr);
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment