Commit 37aa5c36 authored by Guy Levi's avatar Guy Levi Committed by Doug Ledford

IB/mlx5: Add UARs write-combining and non-cached mapping

By this patch, the user space library will be able to improve
performance using appropriate ringing DoorBell method according to the
memory type it asked for.

Currently only one mapping command is allowed for UARs:
MLX5_IB_MMAP_REGULAR_PAGE. Using this mapping, the kernel maps the
UARs to write-combining (WC) if the system supports it.
If the system is not supporting WC the UARs are mapped to
non-cached(NC). In this case the user space library can't tell which
mapping is applied.
This patch adds 2 new mapping commands: MLX5_IB_MMAP_WC_PAGE and
MLX5_IB_MMAP_NC_PAGE. For these commands the kernel maps exactly as
requested and fails if it can't.

Since there is no generic way to check if the requested memory region
can be mapped as WC, driver enables conclusive WC mapping only for
x86, PowerPC and ARM which support WC for the device's memory region.
Signed-off-by: default avatarGuy Levy <guyle@mellanox.com>
Signed-off-by: default avatarMoshe Lazer <moshel@mellanox.com>
Signed-off-by: default avatarMatan Barak <matanb@mellanox.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 6cbac1e4
...@@ -38,6 +38,9 @@ ...@@ -38,6 +38,9 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/io-mapping.h> #include <linux/io-mapping.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
#include <linux/sched.h> #include <linux/sched.h>
#include <rdma/ib_user_verbs.h> #include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h> #include <rdma/ib_addr.h>
...@@ -1068,18 +1071,49 @@ static int get_index(unsigned long offset) ...@@ -1068,18 +1071,49 @@ static int get_index(unsigned long offset)
return get_arg(offset); return get_arg(offset);
} }
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
{ {
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); switch (cmd) {
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); case MLX5_IB_MMAP_WC_PAGE:
struct mlx5_uuar_info *uuari = &context->uuari; return "WC";
unsigned long command; case MLX5_IB_MMAP_REGULAR_PAGE:
unsigned long idx; return "best effort WC";
phys_addr_t pfn; case MLX5_IB_MMAP_NC_PAGE:
return "NC";
default:
return NULL;
}
}
command = get_command(vma->vm_pgoff); static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
switch (command) { struct vm_area_struct *vma, struct mlx5_uuar_info *uuari)
{
int err;
unsigned long idx;
phys_addr_t pfn, pa;
pgprot_t prot;
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
/* Some architectures don't support WC memory */
#if defined(CONFIG_X86)
if (!pat_enabled())
return -EPERM;
#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
return -EPERM;
#endif
/* fall through */
case MLX5_IB_MMAP_REGULAR_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE:
/* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
prot = pgprot_writecombine(vma->vm_page_prot);
break;
case MLX5_IB_MMAP_NC_PAGE:
prot = pgprot_noncached(vma->vm_page_prot);
break;
default:
return -EINVAL;
}
if (vma->vm_end - vma->vm_start != PAGE_SIZE) if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL; return -EINVAL;
...@@ -1088,18 +1122,38 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm ...@@ -1088,18 +1122,38 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return -EINVAL; return -EINVAL;
pfn = uar_index2pfn(dev, uuari->uars[idx].index); pfn = uar_index2pfn(dev, uuari->uars[idx].index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
(unsigned long long)pfn);
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_page_prot = prot;
if (io_remap_pfn_range(vma, vma->vm_start, pfn, err = io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot)) PAGE_SIZE, vma->vm_page_prot);
if (err) {
mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
return -EAGAIN; return -EAGAIN;
}
mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n", pa = pfn << PAGE_SHIFT;
vma->vm_start, mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
(unsigned long long)pfn << PAGE_SHIFT); vma->vm_start, &pa);
break;
return 0;
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
unsigned long command;
phys_addr_t pfn;
command = get_command(vma->vm_pgoff);
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
return uar_mmap(dev, command, vma, uuari);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS; return -ENOSYS;
......
...@@ -70,6 +70,8 @@ enum { ...@@ -70,6 +70,8 @@ enum {
enum mlx5_ib_mmap_cmd { enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_REGULAR_PAGE = 0, MLX5_IB_MMAP_REGULAR_PAGE = 0,
MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
MLX5_IB_MMAP_WC_PAGE = 2,
MLX5_IB_MMAP_NC_PAGE = 3,
/* 5 is chosen in order to be compatible with old versions of libmlx5 */ /* 5 is chosen in order to be compatible with old versions of libmlx5 */
MLX5_IB_MMAP_CORE_CLOCK = 5, MLX5_IB_MMAP_CORE_CLOCK = 5,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment