Commit 3341713c authored by Linus Torvalds's avatar Linus Torvalds

Merge tags 'for-linus' and 'for-next' of...

Merge tags 'for-linus' and 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull more rdma updates from Doug Ledford:
 "As mentioned in my first pull request, this is the subsequent pull
  requests I had. This is all I have, and in fact this cleans out the
  RDMA subsystem's entire patchworks queue of kernel changes that are
  ready to go (well, it did for the weekend anyway, a few new patches
  are in, but they'll be coming during the -rc cycle).

  The first tag contains a single patch that would have conflicted if
  taken from my tree or DaveM's tree as it needed our trees merged to
  come cleanly.

  The second tag contains the patch series from Intel plus three other
  stragllers that came in late last week. I took them because it allowed
  me to legitimately claim that the RDMA patchworks queue was, for a
  short time, 100% cleared of all waiting kernel patches, woohoo! :-).

  I have it under my for-next tag, so it did get 0day and linux- next
  over the end of last week, and linux-next did show one minor conflict.

  Summary:

  'for-linus' tag:
   - mlx5/IPoIB fixup patch

  'for-next' tag:
   - the hfi1 15 patch set that landed late
   - IPoIB get_link_ksettings which landed late because I asked for a
     respin
   - one late rxe change
   - one -rc worthy fix that's in early"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/mlx5: Enable IPoIB acceleration

* tag 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  rxe: expose num_possible_cpus() cnum_comp_vectors
  IB/rxe: Update caller's CRC for RXE_MEM_TYPE_DMA memory type
  IB/hfi1: Clean up on context initialization failure
  IB/hfi1: Fix an assign/ordering issue with shared context IDs
  IB/hfi1: Clean up context initialization
  IB/hfi1: Correctly clear the pkey
  IB/hfi1: Search shared contexts on the opened device, not all devices
  IB/hfi1: Remove atomic operations for SDMA_REQ_HAVE_AHG bit
  IB/hfi1: Use filedata rather than filepointer
  IB/hfi1: Name function prototype parameters
  IB/hfi1: Fix a subcontext memory leak
  IB/hfi1: Return an error on memory allocation failure
  IB/hfi1: Adjust default eager_buffer_size to 8MB
  IB/hfi1: Get rid of divide when setting the tx request header
  IB/hfi1: Fix yield logic in send engine
  IB/hfi1, IB/rdmavt: Move r_adefered to r_lock cache line
  IB/hfi1: Fix checks for Offline transient state
  IB/ipoib: add get_link_ksettings in ethtool
......@@ -1055,7 +1055,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void set_partition_keys(struct hfi1_pportdata *);
static void set_partition_keys(struct hfi1_pportdata *ppd);
static const char *link_state_name(u32 state);
static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
u32 state);
......@@ -1068,9 +1068,9 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *);
static void dc_shutdown(struct hfi1_devdata *);
static void dc_start(struct hfi1_devdata *);
static void handle_temp_err(struct hfi1_devdata *dd);
static void dc_shutdown(struct hfi1_devdata *dd);
static void dc_start(struct hfi1_devdata *dd);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
......@@ -10233,7 +10233,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
if (pstate == PLS_OFFLINE) {
do_transition = 0; /* in right state */
do_wait = 0; /* ...no need to wait */
} else if ((pstate & 0xff) == PLS_OFFLINE) {
} else if ((pstate & 0xf0) == PLS_OFFLINE) {
do_transition = 0; /* in an offline transient state */
do_wait = 1; /* ...wait for it to settle */
} else {
......@@ -12662,7 +12662,7 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
#define SET_STATIC_RATE_CONTROL_SMASK(r) \
(r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
int hfi1_init_ctxt(struct send_context *sc)
void hfi1_init_ctxt(struct send_context *sc)
{
if (sc) {
struct hfi1_devdata *dd = sc->dd;
......@@ -12679,7 +12679,6 @@ int hfi1_init_ctxt(struct send_context *sc)
write_kctxt_csr(dd, sc->hw_context,
SEND_CTXT_CHECK_ENABLE, reg);
}
return 0;
}
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
......@@ -14528,30 +14527,24 @@ int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
return ret;
}
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
{
struct hfi1_ctxtdata *rcd;
unsigned sctxt;
int ret = 0;
u8 hw_ctxt;
u64 reg;
if (ctxt < dd->num_rcv_contexts) {
rcd = dd->rcd[ctxt];
} else {
ret = -EINVAL;
goto done;
}
if (!rcd || !rcd->sc) {
ret = -EINVAL;
goto done;
}
sctxt = rcd->sc->hw_context;
reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
if (!ctxt || !ctxt->sc)
return -EINVAL;
if (ctxt->ctxt >= dd->num_rcv_contexts)
return -EINVAL;
hw_ctxt = ctxt->sc->hw_context;
reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
done:
return ret;
write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
return 0;
}
/*
......
......@@ -636,7 +636,8 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt,
write_csr(dd, offset0 + (0x1000 * ctxt), value);
}
u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32);
u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
u32 dw_len);
/* firmware.c */
#define SBUS_MASTER_BROADCAST 0xfd
......@@ -728,7 +729,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd);
void set_intr_state(struct hfi1_devdata *dd, u32 enable);
void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
int refresh_widths);
void update_usrhead(struct hfi1_ctxtdata *, u32, u32, u32, u32, u32);
void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
u32 intr_adjust, u32 npkts);
int stop_drain_data_vls(struct hfi1_devdata *dd);
int open_fill_data_vls(struct hfi1_devdata *dd);
u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns);
......@@ -1347,7 +1349,7 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
struct ib_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
int hfi1_init_ctxt(struct send_context *sc);
void hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
......@@ -1360,7 +1362,7 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt);
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
......
......@@ -85,8 +85,8 @@ module_param_named(cu, hfi1_cu, uint, S_IRUGO);
MODULE_PARM_DESC(cu, "Credit return units");
unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
static int hfi1_caps_set(const char *, const struct kernel_param *);
static int hfi1_caps_get(char *, const struct kernel_param *);
static int hfi1_caps_set(const char *val, const struct kernel_param *kp);
static int hfi1_caps_get(char *buffer, const struct kernel_param *kp);
static const struct kernel_param_ops cap_ops = {
.set = hfi1_caps_set,
.get = hfi1_caps_get
......@@ -210,42 +210,6 @@ int hfi1_count_active_units(void)
return nunits_active;
}
/*
* Return count of all units, optionally return in arguments
* the number of usable (present) units, and the number of
* ports that are up.
*/
int hfi1_count_units(int *npresentp, int *nupp)
{
int nunits = 0, npresent = 0, nup = 0;
struct hfi1_devdata *dd;
unsigned long flags;
int pidx;
struct hfi1_pportdata *ppd;
spin_lock_irqsave(&hfi1_devs_lock, flags);
list_for_each_entry(dd, &hfi1_dev_list, list) {
nunits++;
if ((dd->flags & HFI1_PRESENT) && dd->kregbase)
npresent++;
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (ppd->lid && ppd->linkup)
nup++;
}
}
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
if (npresentp)
*npresentp = npresent;
if (nupp)
*nupp = nup;
return nunits;
}
/*
* Get address of eager buffer from it's index (allocated in chunks, not
* contiguous).
......@@ -1325,7 +1289,7 @@ int hfi1_reset_device(int unit)
if (dd->rcd)
for (i = dd->first_dyn_alloc_ctxt;
i < dd->num_rcv_contexts; i++) {
if (!dd->rcd[i] || !dd->rcd[i]->cnt)
if (!dd->rcd[i])
continue;
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
ret = -EBUSY;
......
......@@ -49,6 +49,7 @@
#include <linux/vmalloc.h>
#include <linux/io.h>
#include <linux/sched/mm.h>
#include <linux/bitmap.h>
#include <rdma/ib.h>
......@@ -70,30 +71,37 @@
/*
* File operation functions
*/
static int hfi1_file_open(struct inode *, struct file *);
static int hfi1_file_close(struct inode *, struct file *);
static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
static u64 kvirt_to_phys(void *);
static int assign_ctxt(struct file *, struct hfi1_user_info *);
static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *);
static int user_init(struct file *);
static int get_ctxt_info(struct file *, void __user *, __u32);
static int get_base_info(struct file *, void __user *, __u32);
static int setup_ctxt(struct file *);
static int setup_subctxt(struct hfi1_ctxtdata *);
static int get_user_context(struct file *, struct hfi1_user_info *, int);
static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
static int allocate_ctxt(struct file *, struct hfi1_devdata *,
struct hfi1_user_info *);
static unsigned int poll_urgent(struct file *, struct poll_table_struct *);
static unsigned int poll_next(struct file *, struct poll_table_struct *);
static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
static int vma_fault(struct vm_fault *);
static int hfi1_file_open(struct inode *inode, struct file *fp);
static int hfi1_file_close(struct inode *inode, struct file *fp);
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from);
static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt);
static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma);
static u64 kvirt_to_phys(void *addr);
static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo);
static int init_user_ctxt(struct hfi1_filedata *fd);
static void user_init(struct hfi1_ctxtdata *uctxt);
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
__u32 len);
static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
__u32 len);
static int setup_base_ctxt(struct hfi1_filedata *fd);
static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
static int find_sub_ctxt(struct hfi1_filedata *fd,
const struct hfi1_user_info *uinfo);
static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
struct hfi1_user_info *uinfo);
static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt);
static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
unsigned long events);
static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey);
static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
int start_stop);
static int vma_fault(struct vm_fault *vmf);
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg);
......@@ -173,6 +181,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata,
user_cdev);
if (!((dd->flags & HFI1_PRESENT) && dd->kregbase))
return -EINVAL;
if (!atomic_inc_not_zero(&dd->user_refcount))
return -ENXIO;
......@@ -187,6 +198,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm;
mmgrab(fd->mm);
fd->dd = dd;
fp->private_data = fd;
} else {
fp->private_data = NULL;
......@@ -229,20 +241,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(uinfo)))
return -EFAULT;
ret = assign_ctxt(fp, &uinfo);
if (ret < 0)
return ret;
ret = setup_ctxt(fp);
if (ret)
return ret;
ret = user_init(fp);
ret = assign_ctxt(fd, &uinfo);
break;
case HFI1_IOCTL_CTXT_INFO:
ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg,
sizeof(struct hfi1_ctxt_info));
break;
case HFI1_IOCTL_USER_INFO:
ret = get_base_info(fp, (void __user *)(unsigned long)arg,
ret = get_base_info(fd, (void __user *)(unsigned long)arg,
sizeof(struct hfi1_base_info));
break;
case HFI1_IOCTL_CREDIT_UPD:
......@@ -256,7 +262,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
ret = hfi1_user_exp_rcv_setup(fd, &tinfo);
if (!ret) {
/*
* Copy the number of tidlist entries we used
......@@ -278,7 +284,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
ret = hfi1_user_exp_rcv_clear(fd, &tinfo);
if (ret)
break;
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
......@@ -293,7 +299,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sizeof(tinfo)))
return -EFAULT;
ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
ret = hfi1_user_exp_rcv_invalid(fd, &tinfo);
if (ret)
break;
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
......@@ -430,7 +436,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
unsigned long count = 0;
ret = hfi1_user_sdma_process_request(
kiocb->ki_filp, (struct iovec *)(from->iov + done),
fd, (struct iovec *)(from->iov + done),
dim, &count);
if (ret) {
reqs = ret;
......@@ -756,6 +762,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
/* release the cpu */
hfi1_put_proc_affinity(fdata->rec_cpu_num);
/* clean up rcv side */
hfi1_user_exp_rcv_free(fdata);
/*
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
......@@ -764,8 +773,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
*ev = 0;
if (--uctxt->cnt) {
uctxt->active_slaves &= ~(1 << fdata->subctxt);
__clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
mutex_unlock(&hfi1_mutex);
goto done;
}
......@@ -795,8 +804,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
dd->rcd[uctxt->ctxt] = NULL;
hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
hfi1_user_exp_rcv_grp_free(uctxt);
hfi1_clear_ctxt_pkey(dd, uctxt);
uctxt->rcvwait_to = 0;
uctxt->piowait_to = 0;
......@@ -836,127 +845,135 @@ static u64 kvirt_to_phys(void *addr)
return paddr;
}
static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
{
int i_minor, ret = 0;
int ret;
unsigned int swmajor, swminor;
swmajor = uinfo->userversion >> 16;
if (swmajor != HFI1_USER_SWMAJOR) {
ret = -ENODEV;
goto done;
}
if (swmajor != HFI1_USER_SWMAJOR)
return -ENODEV;
swminor = uinfo->userversion & 0xffff;
mutex_lock(&hfi1_mutex);
/* First, lets check if we need to setup a shared context? */
/*
* Get a sub context if necessary.
* ret < 0 error, 0 no context, 1 sub-context found
*/
ret = 0;
if (uinfo->subctxt_cnt) {
struct hfi1_filedata *fd = fp->private_data;
ret = find_shared_ctxt(fp, uinfo);
if (ret < 0)
goto done_unlock;
if (ret) {
ret = find_sub_ctxt(fd, uinfo);
if (ret > 0)
fd->rec_cpu_num =
hfi1_get_proc_affinity(fd->uctxt->numa_id);
}
}
/*
* We execute the following block if we couldn't find a
* shared context or if context sharing is not required.
* Allocate a base context if context sharing is not required or we
* couldn't find a sub context.
*/
if (!ret) {
i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
ret = get_user_context(fp, uinfo, i_minor);
}
done_unlock:
if (!ret)
ret = allocate_ctxt(fd, fd->dd, uinfo);
mutex_unlock(&hfi1_mutex);
done:
/* Depending on the context type, do the appropriate init */
if (ret > 0) {
/*
* sub-context info can only be set up after the base
* context has been completed.
*/
ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
HFI1_CTXT_BASE_UNINIT,
&fd->uctxt->event_flags));
if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
return -ENOMEM;
}
/* The only thing a sub context needs is the user_xxx stuff */
if (!ret)
ret = init_user_ctxt(fd);
if (ret)
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
} else if (!ret) {
ret = setup_base_ctxt(fd);
if (fd->uctxt->subctxt_cnt) {
/* If there is an error, set the failed bit. */
if (ret)
set_bit(HFI1_CTXT_BASE_FAILED,
&fd->uctxt->event_flags);
/*
* Base context is done, notify anybody using a
* sub-context that is waiting for this completion
*/
clear_bit(HFI1_CTXT_BASE_UNINIT,
&fd->uctxt->event_flags);
wake_up(&fd->uctxt->wait);
}
}
return ret;
}
static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
int devno)
/*
* The hfi1_mutex must be held when this function is called. It is
* necessary to ensure serialized access to the bitmask in_use_ctxts.
*/
static int find_sub_ctxt(struct hfi1_filedata *fd,
const struct hfi1_user_info *uinfo)
{
struct hfi1_devdata *dd = NULL;
int devmax, npresent, nup;
int i;
struct hfi1_devdata *dd = fd->dd;
u16 subctxt;
devmax = hfi1_count_units(&npresent, &nup);
if (!npresent)
return -ENXIO;
for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) {
struct hfi1_ctxtdata *uctxt = dd->rcd[i];
if (!nup)
return -ENETDOWN;
/* Skip ctxts which are not yet open */
if (!uctxt ||
bitmap_empty(uctxt->in_use_ctxts,
HFI1_MAX_SHARED_CTXTS))
continue;
dd = hfi1_lookup(devno);
if (!dd)
return -ENODEV;
else if (!dd->freectxts)
return -EBUSY;
/* Skip dynamically allocted kernel contexts */
if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
continue;
return allocate_ctxt(fp, dd, uinfo);
}
/* Skip ctxt if it doesn't match the requested one */
if (memcmp(uctxt->uuid, uinfo->uuid,
sizeof(uctxt->uuid)) ||
uctxt->jkey != generate_jkey(current_uid()) ||
uctxt->subctxt_id != uinfo->subctxt_id ||
uctxt->subctxt_cnt != uinfo->subctxt_cnt)
continue;
static int find_shared_ctxt(struct file *fp,
const struct hfi1_user_info *uinfo)
{
int devmax, ndev, i;
int ret = 0;
struct hfi1_filedata *fd = fp->private_data;
/* Verify the sharing process matches the master */
if (uctxt->userversion != uinfo->userversion)
return -EINVAL;
devmax = hfi1_count_units(NULL, NULL);
/* Find an unused context */
subctxt = find_first_zero_bit(uctxt->in_use_ctxts,
HFI1_MAX_SHARED_CTXTS);
if (subctxt >= uctxt->subctxt_cnt)
return -EBUSY;
for (ndev = 0; ndev < devmax; ndev++) {
struct hfi1_devdata *dd = hfi1_lookup(ndev);
fd->uctxt = uctxt;
fd->subctxt = subctxt;
__set_bit(fd->subctxt, uctxt->in_use_ctxts);
if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
continue;
for (i = dd->first_dyn_alloc_ctxt;
i < dd->num_rcv_contexts; i++) {
struct hfi1_ctxtdata *uctxt = dd->rcd[i];
/* Skip ctxts which are not yet open */
if (!uctxt || !uctxt->cnt)
continue;
/* Skip dynamically allocted kernel contexts */
if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
continue;
/* Skip ctxt if it doesn't match the requested one */
if (memcmp(uctxt->uuid, uinfo->uuid,
sizeof(uctxt->uuid)) ||
uctxt->jkey != generate_jkey(current_uid()) ||
uctxt->subctxt_id != uinfo->subctxt_id ||
uctxt->subctxt_cnt != uinfo->subctxt_cnt)
continue;
/* Verify the sharing process matches the master */
if (uctxt->userversion != uinfo->userversion ||
uctxt->cnt >= uctxt->subctxt_cnt) {
ret = -EINVAL;
goto done;
}
fd->uctxt = uctxt;
fd->subctxt = uctxt->cnt++;
uctxt->active_slaves |= 1 << fd->subctxt;
ret = 1;
goto done;
}
return 1;
}
done:
return ret;
return 0;
}
static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
struct hfi1_user_info *uinfo)
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt;
unsigned ctxt;
unsigned int ctxt;
int ret, numa;
if (dd->flags & HFI1_FROZEN) {
......@@ -970,6 +987,14 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
return -EIO;
}
/*
* This check is sort of redundant to the next EBUSY error. It would
* also indicate an inconsistancy in the driver if this value was
* zero, but there were still contexts available.
*/
if (!dd->freectxts)
return -EBUSY;
for (ctxt = dd->first_dyn_alloc_ctxt;
ctxt < dd->num_rcv_contexts; ctxt++)
if (!dd->rcd[ctxt])
......@@ -1013,12 +1038,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
goto ctxdata_free;
/*
* Setup shared context resources if the user-level has requested
* shared contexts and this is the 'master' process.
* Setup sub context resources if the user-level has requested
* sub contexts.
* This has to be done here so the rest of the sub-contexts find the
* proper master.
*/
if (uinfo->subctxt_cnt && !fd->subctxt) {
if (uinfo->subctxt_cnt) {
ret = init_subctxts(uctxt, uinfo);
/*
* On error, we don't need to disable and de-allocate the
......@@ -1055,7 +1080,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo)
{
unsigned num_subctxts;
u16 num_subctxts;
num_subctxts = uinfo->subctxt_cnt;
if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
......@@ -1063,9 +1088,8 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt,
uctxt->subctxt_cnt = uinfo->subctxt_cnt;
uctxt->subctxt_id = uinfo->subctxt_id;
uctxt->active_slaves = 1;
uctxt->redirect_seq_cnt = 1;
set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
return 0;
}
......@@ -1073,13 +1097,12 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt,
static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
{
int ret = 0;
unsigned num_subctxts = uctxt->subctxt_cnt;
u16 num_subctxts = uctxt->subctxt_cnt;
uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE);
if (!uctxt->subctxt_uregbase) {
ret = -ENOMEM;
goto bail;
}
if (!uctxt->subctxt_uregbase)
return -ENOMEM;
/* We can take the size of the RcvHdr Queue from the master */
uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
num_subctxts);
......@@ -1094,25 +1117,22 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
ret = -ENOMEM;
goto bail_rhdr;
}
goto bail;
return 0;
bail_rhdr:
vfree(uctxt->subctxt_rcvhdr_base);
uctxt->subctxt_rcvhdr_base = NULL;
bail_ureg:
vfree(uctxt->subctxt_uregbase);
uctxt->subctxt_uregbase = NULL;
bail:
return ret;
}
static int user_init(struct file *fp)
static void user_init(struct hfi1_ctxtdata *uctxt)
{
unsigned int rcvctrl_ops = 0;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
/* make sure that the context has already been setup */
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
return -EFAULT;
/* initialize poll variables... */
uctxt->urgent = 0;
......@@ -1160,20 +1180,12 @@ static int user_init(struct file *fp)
else
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
/* Notify any waiting slaves */
if (uctxt->subctxt_cnt) {
clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
return 0;
}
static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
__u32 len)
{
struct hfi1_ctxt_info cinfo;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
int ret = 0;
......@@ -1211,75 +1223,71 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
return ret;
}
static int setup_ctxt(struct file *fp)
static int init_user_ctxt(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
int ret;
ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
if (ret)
return ret;
ret = hfi1_user_exp_rcv_init(fd);
return ret;
}
static int setup_base_ctxt(struct hfi1_filedata *fd)
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
/*
* Context should be set up only once, including allocation and
* programming of eager buffers. This is done if context sharing
* is not requested or by the master process.
*/
if (!uctxt->subctxt_cnt || !fd->subctxt) {
ret = hfi1_init_ctxt(uctxt->sc);
if (ret)
goto done;
hfi1_init_ctxt(uctxt->sc);
/* Now allocate the RcvHdr queue and eager buffers. */
ret = hfi1_create_rcvhdrq(dd, uctxt);
if (ret)
goto done;
ret = hfi1_setup_eagerbufs(uctxt);
if (ret)
goto done;
if (uctxt->subctxt_cnt && !fd->subctxt) {
ret = setup_subctxt(uctxt);
if (ret)
goto done;
}
} else {
ret = wait_event_interruptible(uctxt->wait, !test_bit(
HFI1_CTXT_MASTER_UNINIT,
&uctxt->event_flags));
if (ret)
goto done;
}
/* Now allocate the RcvHdr queue and eager buffers. */
ret = hfi1_create_rcvhdrq(dd, uctxt);
if (ret)
return ret;
ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
ret = hfi1_setup_eagerbufs(uctxt);
if (ret)
goto done;
/*
* Expected receive has to be setup for all processes (including
* shared contexts). However, it has to be done after the master
* context has been fully configured as it depends on the
* eager/expected split of the RcvArray entries.
* Setting it up here ensures that the subcontexts will be waiting
* (due to the above wait_event_interruptible() until the master
* is setup.
*/
ret = hfi1_user_exp_rcv_init(fp);
goto setup_failed;
/* If sub-contexts are enabled, do the appropriate setup */
if (uctxt->subctxt_cnt)
ret = setup_subctxt(uctxt);
if (ret)
goto done;
goto setup_failed;
set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
done:
ret = hfi1_user_exp_rcv_grp_init(fd);
if (ret)
goto setup_failed;
ret = init_user_ctxt(fd);
if (ret)
goto setup_failed;
user_init(uctxt);
return 0;
setup_failed:
hfi1_free_ctxtdata(dd, uctxt);
return ret;
}
static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
__u32 len)
{
struct hfi1_base_info binfo;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
ssize_t sz;
unsigned offset;
int ret = 0;
trace_hfi1_uctxtdata(uctxt->dd, uctxt);
trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt);
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
......@@ -1443,7 +1451,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
int start_stop)
{
struct hfi1_devdata *dd = uctxt->dd;
......@@ -1478,7 +1486,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
* User process then performs actions appropriate to bit having been
* set, if desired, and checks again in future.
*/
static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
unsigned long events)
{
int i;
......@@ -1499,8 +1507,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
return 0;
}
static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
u16 pkey)
static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey)
{
int ret = -ENOENT, i, intable = 0;
struct hfi1_pportdata *ppd = uctxt->ppd;
......
......@@ -196,12 +196,6 @@ struct hfi1_ctxtdata {
void *rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
volatile __le64 *rcvhdrtail_kvaddr;
/*
* Shared page for kernel to signal user processes that send buffers
* need disarming. The process should call HFI1_CMD_DISARM_BUFS
* or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
*/
unsigned long *user_event_mask;
/* when waiting for rcv or pioavail */
wait_queue_head_t wait;
/* rcvhdrq size (for freeing) */
......@@ -224,13 +218,12 @@ struct hfi1_ctxtdata {
* (ignoring forks, dup, etc. for now)
*/
int cnt;
/* Device context index */
unsigned ctxt;
/*
* how much space to leave at start of eager TID entries for
* protocol use, on each TID
* non-zero if ctxt can be shared, and defines the maximum number of
* sub-contexts for this device context.
*/
/* instead of calculating it */
unsigned ctxt;
/* non-zero if ctxt is being shared. */
u16 subctxt_cnt;
/* non-zero if ctxt is being shared. */
u16 subctxt_id;
......@@ -288,10 +281,10 @@ struct hfi1_ctxtdata {
void *subctxt_rcvegrbuf;
/* An array of pages for the eager header queue entries * N */
void *subctxt_rcvhdr_base;
/* Bitmask of in use context(s) */
DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
/* The version of the library which opened this ctxt */
u32 userversion;
/* Bitmask of active slaves */
u32 active_slaves;
/* Type of packets or conditions we want to poll for */
u16 poll_type;
/* receive packet sequence counter */
......@@ -1238,10 +1231,11 @@ struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
struct hfi1_devdata *dd;
struct hfi1_ctxtdata *uctxt;
unsigned subctxt;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
u16 subctxt;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
......@@ -1263,27 +1257,27 @@ struct hfi1_devdata *hfi1_lookup(int unit);
extern u32 hfi1_cpulist_count;
extern unsigned long *hfi1_cpulist;
int hfi1_init(struct hfi1_devdata *, int);
int hfi1_count_units(int *npresentp, int *nupp);
int hfi1_init(struct hfi1_devdata *dd, int reinit);
int hfi1_count_active_units(void);
int hfi1_diag_add(struct hfi1_devdata *);
void hfi1_diag_remove(struct hfi1_devdata *);
int hfi1_diag_add(struct hfi1_devdata *dd);
void hfi1_diag_remove(struct hfi1_devdata *dd);
void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup);
void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
int hfi1_create_ctxts(struct hfi1_devdata *dd);
struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
struct hfi1_devdata *, u8, u8);
void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
int numa);
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
void set_all_slowpath(struct hfi1_devdata *dd);
void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
......@@ -1580,7 +1574,7 @@ static inline int rcv_pkey_check(struct hfi1_pportdata *ppd, u16 pkey,
u32 lrh_max_header_bytes(struct hfi1_devdata *dd);
int mtu_to_enum(u32 mtu, int default_if_bad);
u16 enum_to_mtu(int);
u16 enum_to_mtu(int mtu);
static inline int valid_ib_mtu(unsigned int mtu)
{
return mtu == 256 || mtu == 512 ||
......@@ -1594,15 +1588,15 @@ static inline int valid_opa_max_mtu(unsigned int mtu)
(valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240);
}
int set_mtu(struct hfi1_pportdata *);
int set_mtu(struct hfi1_pportdata *ppd);
int hfi1_set_lid(struct hfi1_pportdata *, u32, u8);
void hfi1_disable_after_error(struct hfi1_devdata *);
int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int);
int hfi1_rcvbuf_validate(u32, u8, u16 *);
int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc);
void hfi1_disable_after_error(struct hfi1_devdata *dd);
int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit);
int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
int fm_get_table(struct hfi1_pportdata *, int, void *);
int fm_set_table(struct hfi1_pportdata *, int, void *);
int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
void reset_link_credits(struct hfi1_devdata *dd);
......@@ -1724,19 +1718,19 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1)
/* ctxt_flag bit offsets */
/* context has been setup */
#define HFI1_CTXT_SETUP_DONE 1
/* base context has not finished initializing */
#define HFI1_CTXT_BASE_UNINIT 1
/* base context initaliation failed */
#define HFI1_CTXT_BASE_FAILED 2
/* waiting for a packet to arrive */
#define HFI1_CTXT_WAITING_RCV 2
/* master has not finished initializing */
#define HFI1_CTXT_MASTER_UNINIT 4
#define HFI1_CTXT_WAITING_RCV 3
/* waiting for an urgent packet to arrive */
#define HFI1_CTXT_WAITING_URG 5
#define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
const struct pci_device_id *);
void hfi1_free_devdata(struct hfi1_devdata *);
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
const struct pci_device_id *ent);
void hfi1_free_devdata(struct hfi1_devdata *dd);
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
......@@ -1811,23 +1805,24 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
extern const char ib_hfi1_version[];
int hfi1_device_create(struct hfi1_devdata *);
void hfi1_device_remove(struct hfi1_devdata *);
int hfi1_device_create(struct hfi1_devdata *dd);
void hfi1_device_remove(struct hfi1_devdata *dd);
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
struct kobject *kobj);
int hfi1_verbs_register_sysfs(struct hfi1_devdata *);
void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *);
int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd);
void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
void hfi1_pcie_cleanup(struct pci_dev *);
int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *);
void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *);
void hfi1_enable_intx(struct pci_dev *);
int pcie_speeds(struct hfi1_devdata *dd);
void request_msix(struct hfi1_devdata *dd, u32 *nent,
struct hfi1_msix_entry *entry);
void hfi1_enable_intx(struct pci_dev *pdev);
void restore_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
......
......@@ -53,6 +53,7 @@
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/hrtimer.h>
#include <linux/bitmap.h>
#include <rdma/rdma_vt.h>
#include "hfi.h"
......@@ -70,6 +71,7 @@
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/*
* min buffers we want to have per context, after driver
*/
......@@ -101,9 +103,9 @@ static unsigned hfi1_rcvarr_split = 25;
module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers");
static uint eager_buffer_size = (2 << 20); /* 2MB */
static uint eager_buffer_size = (8 << 20); /* 8MB */
module_param(eager_buffer_size, uint, S_IRUGO);
MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 2MB");
MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB");
static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */
module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
......@@ -117,7 +119,7 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
static inline u64 encode_rcv_header_entry_size(u16);
static inline u64 encode_rcv_header_entry_size(u16 size);
static struct idr hfi1_unit_table;
u32 hfi1_cpulist_count;
......@@ -175,13 +177,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
goto nomem;
}
ret = hfi1_init_ctxt(rcd->sc);
if (ret < 0) {
dd_dev_err(dd,
"Failed to setup kernel receive context, failing\n");
ret = -EFAULT;
goto bail;
}
hfi1_init_ctxt(rcd->sc);
}
/*
......@@ -193,7 +189,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
return 0;
nomem:
ret = -ENOMEM;
bail:
if (dd->rcd) {
for (i = 0; i < dd->num_rcv_contexts; ++i)
hfi1_free_ctxtdata(dd, dd->rcd[i]);
......@@ -227,7 +223,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
INIT_LIST_HEAD(&rcd->qp_wait_list);
rcd->ppd = ppd;
rcd->dd = dd;
rcd->cnt = 1;
__set_bit(0, rcd->in_use_ctxts);
rcd->ctxt = ctxt;
dd->rcd[ctxt] = rcd;
rcd->numa_id = numa;
......@@ -623,7 +619,7 @@ static int create_workqueues(struct hfi1_devdata *dd)
alloc_workqueue(
"hfi%d_%d",
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
dd->num_sdma,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
dd->unit, pidx);
if (!ppd->hfi1_wq)
goto wq_error;
......@@ -968,7 +964,6 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
kfree(rcd->egrbufs.buffers);
sc_free(rcd->sc);
vfree(rcd->user_event_mask);
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
......@@ -1687,8 +1682,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
dd_dev_err(dd,
"attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
rcd->ctxt);
vfree(rcd->user_event_mask);
rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
......@@ -1777,6 +1770,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
rcd->ctxt);
ret = -ENOMEM;
goto bail_rcvegrbuf_phys;
}
......@@ -1854,7 +1848,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
"ctxt%u: current Eager buffer size is invalid %u\n",
rcd->ctxt, rcd->egrbufs.rcvtid_size);
ret = -EINVAL;
goto bail;
goto bail_rcvegrbuf_phys;
}
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
......@@ -1862,7 +1856,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.rcvtids[idx].dma, order);
cond_resched();
}
goto bail;
return 0;
bail_rcvegrbuf_phys:
for (idx = 0; idx < rcd->egrbufs.alloced &&
......@@ -1876,6 +1871,6 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[idx].dma = 0;
rcd->egrbufs.buffers[idx].len = 0;
}
bail:
return ret;
}
......@@ -47,6 +47,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/bitmap.h>
#include "hfi.h"
#include "common.h"
......@@ -189,7 +190,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd)
unsigned long flags;
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (!rcd->cnt)
if (bitmap_empty(rcd->in_use_ctxts, HFI1_MAX_SHARED_CTXTS))
goto done;
if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) {
......
......@@ -731,9 +731,7 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
priv->r_adefered = 0;
qp->r_adefered = 0;
clear_ahg(qp);
}
......
......@@ -727,10 +727,9 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
struct ib_header hdr;
struct ib_other_headers *ohdr;
unsigned long flags;
struct hfi1_qp_priv *priv = qp->priv;
/* clear the defer count */
priv->r_adefered = 0;
qp->r_adefered = 0;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
if (qp->s_flags & RVT_S_RESP_PENDING)
......@@ -1604,9 +1603,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
static inline void rc_cancel_ack(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
priv->r_adefered = 0;
qp->r_adefered = 0;
if (list_empty(&qp->rspwait))
return;
list_del_init(&qp->rspwait);
......@@ -2314,13 +2311,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
if (psn & IB_BTH_REQ_ACK) {
struct hfi1_qp_priv *priv = qp->priv;
if (packet->numpkt == 0) {
rc_cancel_ack(qp);
goto send_ack;
}
if (priv->r_adefered >= HFI1_PSN_CREDIT) {
if (qp->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
......@@ -2328,7 +2323,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
rc_cancel_ack(qp);
goto send_ack;
}
priv->r_adefered++;
qp->r_adefered++;
rc_defered_ack(rcd, qp);
}
return;
......
......@@ -800,6 +800,43 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
/**
* schedule_send_yield - test for a yield required for QP send engine
* @timeout: Final time for timeout slice for jiffies
* @qp: a pointer to QP
* @ps: a pointer to a structure with commonly lookup values for
* the the send engine progress
*
* This routine checks if the time slice for the QP has expired
* for RC QPs, if so an additional work entry is queued. At this
* point, other QPs have an opportunity to be scheduled. It
* returns true if a yield is required, otherwise, false
* is returned.
*/
static bool schedule_send_yield(struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
if (unlikely(time_after(jiffies, ps->timeout))) {
if (!ps->in_thread ||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
spin_lock_irqsave(&qp->s_lock, ps->flags);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
this_cpu_inc(*ps->ppd->dd->send_schedule);
trace_hfi1_rc_expired_time_slice(qp, true);
return true;
}
cond_resched();
this_cpu_inc(*ps->ppd->dd->send_schedule);
ps->timeout = jiffies + ps->timeout_int;
}
trace_hfi1_rc_expired_time_slice(qp, false);
return false;
}
void hfi1_do_send_from_rvt(struct rvt_qp *qp)
{
hfi1_do_send(qp, false);
......@@ -827,13 +864,13 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv;
int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
unsigned long timeout;
unsigned long timeout_int;
int cpu;
ps.dev = to_idev(qp->ibqp.device);
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
ps.in_thread = in_thread;
trace_hfi1_rc_do_send(qp, in_thread);
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
......@@ -844,7 +881,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
return;
}
make_req = hfi1_make_rc_req;
timeout_int = (qp->timeout_jiffies);
ps.timeout_int = qp->timeout_jiffies;
break;
case IB_QPT_UC:
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
......@@ -854,11 +891,11 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
return;
}
make_req = hfi1_make_uc_req;
timeout_int = SEND_RESCHED_TIMEOUT;
ps.timeout_int = SEND_RESCHED_TIMEOUT;
break;
default:
make_req = hfi1_make_ud_req;
timeout_int = SEND_RESCHED_TIMEOUT;
ps.timeout_int = SEND_RESCHED_TIMEOUT;
}
spin_lock_irqsave(&qp->s_lock, ps.flags);
......@@ -871,9 +908,11 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
qp->s_flags |= RVT_S_BUSY;
timeout = jiffies + (timeout_int) / 8;
cpu = priv->s_sde ? priv->s_sde->cpu :
ps.timeout_int = ps.timeout_int / 8;
ps.timeout = jiffies + ps.timeout_int;
ps.cpu = priv->s_sde ? priv->s_sde->cpu :
cpumask_first(cpumask_of_node(ps.ppd->dd->node));
/* insure a pre-built packet is handled */
ps.s_txreq = get_waiting_verbs_txreq(qp);
do {
......@@ -889,28 +928,9 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
/* Record that s_ahg is empty. */
qp->s_hdrwords = 0;
/* allow other tasks to run */
if (unlikely(time_after(jiffies, timeout))) {
if (!in_thread ||
workqueue_congested(
cpu,
ps.ppd->hfi1_wq)) {
spin_lock_irqsave(
&qp->s_lock,
ps.flags);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(
&qp->s_lock,
ps.flags);
this_cpu_inc(
*ps.ppd->dd->send_schedule);
return;
}
cond_resched();
this_cpu_inc(
*ps.ppd->dd->send_schedule);
timeout = jiffies + (timeout_int) / 8;
}
if (schedule_send_yield(qp, &ps))
return;
spin_lock_irqsave(&qp->s_lock, ps.flags);
}
} while (make_req(qp, &ps));
......
......@@ -57,12 +57,14 @@
#define UCTXT_FMT \
"cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \
"rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
"rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx, subctxt_cnt:%u"
TRACE_EVENT(hfi1_uctxtdata,
TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
TP_ARGS(dd, uctxt),
TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt,
unsigned int subctxt),
TP_ARGS(dd, uctxt, subctxt),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(unsigned int, ctxt)
__field(unsigned int, subctxt)
__field(u32, credits)
__field(u64, hw_free)
__field(void __iomem *, piobase)
......@@ -70,9 +72,11 @@ TRACE_EVENT(hfi1_uctxtdata,
__field(u64, rcvhdrq_dma)
__field(u32, eager_cnt)
__field(u64, rcvegr_dma)
__field(unsigned int, subctxt_cnt)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = uctxt->ctxt;
__entry->subctxt = subctxt;
__entry->credits = uctxt->sc->credits;
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
......@@ -80,17 +84,20 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
__entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
__entry->subctxt_cnt = uctxt->subctxt_cnt;
),
TP_printk("[%s] ctxt %u " UCTXT_FMT,
TP_printk("[%s] ctxt %u:%u " UCTXT_FMT,
__get_str(dev),
__entry->ctxt,
__entry->subctxt,
__entry->credits,
__entry->hw_free,
__entry->piobase,
__entry->rcvhdrq_cnt,
__entry->rcvhdrq_dma,
__entry->eager_cnt,
__entry->rcvegr_dma
__entry->rcvegr_dma,
__entry->subctxt_cnt
)
);
......
......@@ -676,6 +676,40 @@ TRACE_EVENT(
)
);
DECLARE_EVENT_CLASS(
hfi1_do_send_template,
TP_PROTO(struct rvt_qp *qp, bool flag),
TP_ARGS(qp, flag),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(bool, flag)
),
TP_fast_assign(
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->flag = flag;
),
TP_printk(
"[%s] qpn %x flag %d",
__get_str(dev),
__entry->qpn,
__entry->flag
)
);
DEFINE_EVENT(
hfi1_do_send_template, hfi1_rc_do_send,
TP_PROTO(struct rvt_qp *qp, bool flag),
TP_ARGS(qp, flag)
);
DEFINE_EVENT(
hfi1_do_send_template, hfi1_rc_expired_time_slice,
TP_PROTO(struct rvt_qp *qp, bool flag),
TP_ARGS(qp, flag)
);
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
......
......@@ -53,7 +53,7 @@
struct tid_group {
struct list_head list;
unsigned base;
u32 base;
u8 size;
u8 used;
u8 map;
......@@ -82,20 +82,25 @@ struct tid_pageset {
(unsigned long)(len) - 1) & PAGE_MASK) - \
((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
struct hfi1_filedata *);
static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned);
static int tid_rb_insert(void *, struct mmu_rb_node *);
static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
struct exp_tid_set *set,
struct hfi1_filedata *fd);
static u32 find_phys_blocks(struct page **pages, unsigned npages,
struct tid_pageset *list);
static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
u32 rcventry, struct tid_group *grp,
struct page **pages, unsigned npages);
static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
static void tid_rb_remove(void *, struct mmu_rb_node *);
static int tid_rb_invalidate(void *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **,
u32 *, unsigned *, unsigned *);
static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
struct tid_group *grp, struct tid_pageset *sets,
unsigned start, u16 count, struct page **pages,
u32 *tidlist, unsigned *tididx, unsigned *pmapped);
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
static struct mmu_rb_ops tid_rb_ops = {
......@@ -149,52 +154,60 @@ static inline void tid_group_move(struct tid_group *group,
tid_group_add_tail(group, s2);
}
int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = fd->dd;
u32 tidbase;
u32 i;
struct tid_group *grp, *gptr;
exp_tid_group_init(&uctxt->tid_group_list);
exp_tid_group_init(&uctxt->tid_used_list);
exp_tid_group_init(&uctxt->tid_full_list);
tidbase = uctxt->expected_base;
for (i = 0; i < uctxt->expected_count /
dd->rcv_entries.group_size; i++) {
grp = kzalloc(sizeof(*grp), GFP_KERNEL);
if (!grp)
goto grp_failed;
grp->size = dd->rcv_entries.group_size;
grp->base = tidbase;
tid_group_add_tail(grp, &uctxt->tid_group_list);
tidbase += dd->rcv_entries.group_size;
}
return 0;
grp_failed:
list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
list) {
list_del_init(&grp->list);
kfree(grp);
}
return -ENOMEM;
}
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
* been configured with the eager/expected RcvEntry counts.
*/
int hfi1_user_exp_rcv_init(struct file *fp)
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
unsigned tidbase;
int i, ret = 0;
int ret = 0;
spin_lock_init(&fd->tid_lock);
spin_lock_init(&fd->invalid_lock);
if (!uctxt->subctxt_cnt || !fd->subctxt) {
exp_tid_group_init(&uctxt->tid_group_list);
exp_tid_group_init(&uctxt->tid_used_list);
exp_tid_group_init(&uctxt->tid_full_list);
tidbase = uctxt->expected_base;
for (i = 0; i < uctxt->expected_count /
dd->rcv_entries.group_size; i++) {
struct tid_group *grp;
grp = kzalloc(sizeof(*grp), GFP_KERNEL);
if (!grp) {
/*
* If we fail here, the groups already
* allocated will be freed by the close
* call.
*/
ret = -ENOMEM;
goto done;
}
grp->size = dd->rcv_entries.group_size;
grp->base = tidbase;
tid_group_add_tail(grp, &uctxt->tid_group_list);
tidbase += dd->rcv_entries.group_size;
}
}
fd->entry_to_rb = kcalloc(uctxt->expected_count,
sizeof(struct rb_node *),
GFP_KERNEL);
sizeof(struct rb_node *),
GFP_KERNEL);
if (!fd->entry_to_rb)
return -ENOMEM;
......@@ -204,8 +217,9 @@ int hfi1_user_exp_rcv_init(struct file *fp)
sizeof(*fd->invalid_tids),
GFP_KERNEL);
if (!fd->invalid_tids) {
ret = -ENOMEM;
goto done;
kfree(fd->entry_to_rb);
fd->entry_to_rb = NULL;
return -ENOMEM;
}
/*
......@@ -248,41 +262,44 @@ int hfi1_user_exp_rcv_init(struct file *fp)
fd->tid_limit = uctxt->expected_count;
}
spin_unlock(&fd->tid_lock);
done:
return ret;
}
int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_group *grp, *gptr;
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
return 0;
list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
list) {
list_del_init(&grp->list);
kfree(grp);
}
hfi1_clear_tids(uctxt);
}
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
/*
* The notifier would have been removed when the process'es mm
* was freed.
*/
if (fd->handler)
if (fd->handler) {
hfi1_mmu_rb_unregister(fd->handler);
kfree(fd->invalid_tids);
if (!uctxt->cnt) {
} else {
if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
list) {
list_del_init(&grp->list);
kfree(grp);
}
hfi1_clear_tids(uctxt);
}
kfree(fd->invalid_tids);
fd->invalid_tids = NULL;
kfree(fd->entry_to_rb);
return 0;
fd->entry_to_rb = NULL;
}
/*
......@@ -351,10 +368,10 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
* can fit into the group. If the group becomes fully
* used, move it to tid_full_list.
*/
int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo)
{
int ret = 0, need_group = 0, pinned;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
......@@ -451,7 +468,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
struct tid_group *grp =
tid_group_pop(&uctxt->tid_group_list);
ret = program_rcvarray(fp, vaddr, grp, pagesets,
ret = program_rcvarray(fd, vaddr, grp, pagesets,
pageidx, dd->rcv_entries.group_size,
pages, tidlist, &tididx, &mapped);
/*
......@@ -497,7 +514,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
unsigned use = min_t(unsigned, pageset_count - pageidx,
grp->size - grp->used);
ret = program_rcvarray(fp, vaddr, grp, pagesets,
ret = program_rcvarray(fd, vaddr, grp, pagesets,
pageidx, use, pages, tidlist,
&tididx, &mapped);
if (ret < 0) {
......@@ -547,7 +564,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
* everything done so far so we don't leak resources.
*/
tinfo->tidlist = (unsigned long)&tidlist;
hfi1_user_exp_rcv_clear(fp, tinfo);
hfi1_user_exp_rcv_clear(fd, tinfo);
tinfo->tidlist = 0;
ret = -EFAULT;
goto bail;
......@@ -571,10 +588,10 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
return ret > 0 ? 0 : ret;
}
int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo)
{
int ret = 0;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
u32 *tidinfo;
unsigned tididx;
......@@ -589,7 +606,7 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
mutex_lock(&uctxt->exp_lock);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
if (ret) {
hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
ret);
......@@ -606,9 +623,9 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
return ret;
}
int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo)
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
unsigned long *ev = uctxt->dd->events +
(((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
......@@ -723,7 +740,7 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
/**
* program_rcvarray() - program an RcvArray group with receive buffers
* @fp: file pointer
* @fd: filedata pointer
* @vaddr: starting user virtual address
* @grp: RcvArray group
* @sets: array of struct tid_pageset holding information on physically
......@@ -748,13 +765,12 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
* -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
* number of RcvArray entries programmed.
*/
static int program_rcvarray(struct file *fp, unsigned long vaddr,
static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr,
struct tid_group *grp,
struct tid_pageset *sets,
unsigned start, u16 count, struct page **pages,
u32 *tidlist, unsigned *tididx, unsigned *pmapped)
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
u16 idx;
......@@ -795,7 +811,7 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
npages = sets[setidx].count;
pageidx = sets[setidx].idx;
ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE),
rcventry, grp, pages + pageidx,
npages);
if (ret)
......@@ -817,12 +833,11 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
return idx;
}
static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr,
u32 rcventry, struct tid_group *grp,
struct page **pages, unsigned npages)
{
int ret;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
......@@ -876,10 +891,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
return 0;
}
static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
struct tid_group **grp)
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
struct tid_rb_node *node;
......
#ifndef _HFI1_USER_EXP_RCV_H
#define _HFI1_USER_EXP_RCV_H
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
......@@ -70,10 +70,15 @@
(tid) |= EXP_TID_SET(field, (value)); \
} while (0)
int hfi1_user_exp_rcv_init(struct file *);
int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt);
int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd);
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd);
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo);
int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo);
int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo);
#endif /* _HFI1_USER_EXP_RCV_H */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
......@@ -143,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* KDETH OM multipliers and switch over point */
#define KDETH_OM_SMALL 4
#define KDETH_OM_SMALL_SHIFT 2
#define KDETH_OM_LARGE 64
#define KDETH_OM_LARGE_SHIFT 6
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
/* Tx request flag bits */
......@@ -153,9 +155,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
#define SDMA_REQ_SEND_DONE 2
#define SDMA_REQ_HAVE_AHG 3
#define SDMA_REQ_HAS_ERROR 4
#define SDMA_REQ_DONE_ERROR 5
#define SDMA_REQ_HAS_ERROR 3
#define SDMA_REQ_DONE_ERROR 4
#define SDMA_PKT_Q_INACTIVE BIT(0)
#define SDMA_PKT_Q_ACTIVE BIT(1)
......@@ -214,7 +215,7 @@ struct user_sdma_request {
* each request will need it's own engine pointer.
*/
struct sdma_engine *sde;
u8 ahg_idx;
s8 ahg_idx;
u32 ahg[9];
/*
* KDETH.Offset (Eager) field
......@@ -228,12 +229,6 @@ struct user_sdma_request {
* size of the TID entry.
*/
u32 tidoffset;
/*
* KDETH.OM
* Remember this because the header template always sets it
* to 0.
*/
u8 omfactor;
/*
* We copy the iovs for this request (based on
* info.iovcnt). These are only the data vectors
......@@ -284,39 +279,43 @@ struct user_sdma_txreq {
hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \
(pq)->subctxt, ##__VA_ARGS__)
static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
static int num_user_pages(const struct iovec *);
static void user_sdma_txreq_cb(struct sdma_txreq *, int);
static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *);
static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
unsigned);
static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *,
struct user_sdma_txreq *, u32);
static int set_txreq_header_ahg(struct user_sdma_request *,
struct user_sdma_txreq *, u32);
static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
struct hfi1_user_sdma_comp_q *,
u16, enum hfi1_sdma_comp_state, int);
static inline u32 set_pkt_bth_psn(__be32, u8, u32);
static int user_sdma_send_pkts(struct user_sdma_request *req,
unsigned maxpkts);
static int num_user_pages(const struct iovec *iov);
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
static int pin_vector_pages(struct user_sdma_request *req,
struct user_sdma_iovec *iovec);
static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
unsigned start, unsigned npages);
static int check_header_template(struct user_sdma_request *req,
struct hfi1_pkt_header *hdr, u32 lrhlen,
u32 datalen);
static int set_txreq_header(struct user_sdma_request *req,
struct user_sdma_txreq *tx, u32 datalen);
static int set_txreq_header_ahg(struct user_sdma_request *req,
struct user_sdma_txreq *tx, u32 len);
static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
struct hfi1_user_sdma_comp_q *cq,
u16 idx, enum hfi1_sdma_comp_state state,
int ret);
static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue(
struct sdma_engine *,
struct iowait *,
struct sdma_txreq *,
unsigned seq);
static void activate_packet_queue(struct iowait *, int);
static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
static int sdma_rb_insert(void *, struct mmu_rb_node *);
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
unsigned int seq);
static void activate_packet_queue(struct iowait *wait, int reason);
static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
unsigned long len);
static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode);
static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
void *arg2, bool *stop);
static void sdma_rb_remove(void *, struct mmu_rb_node *);
static int sdma_rb_invalidate(void *, struct mmu_rb_node *);
static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
static struct mmu_rb_ops sdma_rb_ops = {
.filter = sdma_rb_filter,
......@@ -372,45 +371,27 @@ static void sdma_kmem_cache_ctor(void *obj)
memset(tx, 0, sizeof(*tx));
}
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
struct hfi1_filedata *fd)
{
struct hfi1_filedata *fd;
int ret = 0;
int ret = -ENOMEM;
char buf[64];
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
unsigned long flags;
if (!uctxt || !fp) {
ret = -EBADF;
goto done;
}
fd = fp->private_data;
if (!uctxt || !fd)
return -EBADF;
if (!hfi1_sdma_comp_ring_size) {
ret = -EINVAL;
goto done;
}
if (!hfi1_sdma_comp_ring_size)
return -EINVAL;
dd = uctxt->dd;
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
if (!pq)
goto pq_nomem;
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
sizeof(*pq->reqs),
GFP_KERNEL);
if (!pq->reqs)
goto pq_reqs_nomem;
pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
sizeof(*pq->req_in_use),
GFP_KERNEL);
if (!pq->req_in_use)
goto pq_reqs_no_in_use;
return -ENOMEM;
INIT_LIST_HEAD(&pq->list);
pq->dd = dd;
......@@ -426,10 +407,23 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
activate_packet_queue, NULL);
pq->reqidx = 0;
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
sizeof(*pq->reqs),
GFP_KERNEL);
if (!pq->reqs)
goto pq_reqs_nomem;
pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
sizeof(*pq->req_in_use),
GFP_KERNEL);
if (!pq->req_in_use)
goto pq_reqs_no_in_use;
snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
fd->subctxt);
pq->txreq_cache = kmem_cache_create(buf,
sizeof(struct user_sdma_txreq),
sizeof(struct user_sdma_txreq),
L1_CACHE_BYTES,
SLAB_HWCACHE_ALIGN,
sdma_kmem_cache_ctor);
......@@ -438,7 +432,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
uctxt->ctxt);
goto pq_txreq_nomem;
}
fd->pq = pq;
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
goto cq_nomem;
......@@ -449,20 +443,25 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
goto cq_comps_nomem;
cq->nentries = hfi1_sdma_comp_ring_size;
fd->cq = cq;
ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
&pq->handler);
if (ret) {
dd_dev_err(dd, "Failed to register with MMU %d", ret);
goto done;
goto pq_mmu_fail;
}
fd->pq = pq;
fd->cq = cq;
spin_lock_irqsave(&uctxt->sdma_qlock, flags);
list_add(&pq->list, &uctxt->sdma_queues);
spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
goto done;
return 0;
pq_mmu_fail:
vfree(cq->comps);
cq_comps_nomem:
kfree(cq);
cq_nomem:
......@@ -473,10 +472,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
kfree(pq->reqs);
pq_reqs_nomem:
kfree(pq);
fd->pq = NULL;
pq_nomem:
ret = -ENOMEM;
done:
return ret;
}
......@@ -536,11 +532,11 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash];
}
int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
unsigned long dim, unsigned long *count)
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count)
{
int ret = 0, i;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
......@@ -616,6 +612,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->pq = pq;
req->cq = cq;
req->status = -1;
req->ahg_idx = -1;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
......@@ -766,14 +763,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
}
/* We don't need an AHG entry if the request contains only one packet */
if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) {
int ahg = sdma_ahg_alloc(req->sde);
if (likely(ahg >= 0)) {
req->ahg_idx = (u8)ahg;
set_bit(SDMA_REQ_HAVE_AHG, &req->flags);
}
}
if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs);
......@@ -991,7 +982,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
}
}
if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) {
if (req->ahg_idx >= 0) {
if (!req->seqnum) {
u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
u32 lrhlen = get_lrh_len(req->hdr,
......@@ -1121,7 +1112,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
* happen due to the sequential manner in which
* descriptors are processed.
*/
if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
if (req->ahg_idx >= 0)
sdma_ahg_free(req->sde, req->ahg_idx);
}
return ret;
......@@ -1323,6 +1314,7 @@ static int set_txreq_header(struct user_sdma_request *req,
{
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &tx->hdr;
u8 omfactor; /* KDETH.OM */
u16 pbclen;
int ret;
u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
......@@ -1400,8 +1392,9 @@ static int set_txreq_header(struct user_sdma_request *req,
}
tidval = req->tids[req->tididx];
}
req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL;
omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
KDETH_OM_SMALL_SHIFT;
/* Set KDETH.TIDCtrl based on value for this TID. */
KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
EXP_TID_GET(tidval, CTRL));
......@@ -1416,12 +1409,12 @@ static int set_txreq_header(struct user_sdma_request *req,
* transfer.
*/
SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
req->tidoffset, req->tidoffset / req->omfactor,
req->omfactor != KDETH_OM_SMALL);
req->tidoffset, req->tidoffset >> omfactor,
omfactor != KDETH_OM_SMALL_SHIFT);
KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
req->tidoffset / req->omfactor);
req->tidoffset >> omfactor);
KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
req->omfactor != KDETH_OM_SMALL);
omfactor != KDETH_OM_SMALL_SHIFT);
}
done:
trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
......@@ -1433,6 +1426,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
struct user_sdma_txreq *tx, u32 len)
{
int diff = 0;
u8 omfactor; /* KDETH.OM */
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
......@@ -1484,14 +1478,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
}
tidval = req->tids[req->tididx];
}
req->omfactor = ((EXP_TID_GET(tidval, LEN) *
omfactor = ((EXP_TID_GET(tidval, LEN) *
PAGE_SIZE) >=
KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE :
KDETH_OM_SMALL;
KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
KDETH_OM_SMALL_SHIFT;
/* KDETH.OM and KDETH.OFFSET (TID) */
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
((req->tidoffset / req->omfactor) & 0x7fff)));
((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
((req->tidoffset >> omfactor)
& 0x7fff)));
/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
(EXP_TID_GET(tidval, IDX) & 0x3ff));
......
#ifndef _HFI1_USER_SDMA_H
#define _HFI1_USER_SDMA_H
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
......@@ -56,7 +58,7 @@ extern uint extended_psn;
struct hfi1_user_sdma_pkt_q {
struct list_head list;
unsigned ctxt;
unsigned subctxt;
u16 subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
u16 reqidx;
......@@ -78,7 +80,11 @@ struct hfi1_user_sdma_comp_q {
struct hfi1_sdma_comp_entry *comps;
};
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *);
int hfi1_user_sdma_free_queues(struct hfi1_filedata *);
int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long,
unsigned long *);
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
struct hfi1_filedata *fd);
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd);
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count);
#endif /* _HFI1_USER_SDMA_H */
......@@ -125,7 +125,6 @@ struct hfi1_qp_priv {
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
u8 s_sc; /* SC[0..4] for next packet */
u8 r_adefered; /* number of acks defered */
struct iowait s_iowait;
struct rvt_qp *owner;
};
......@@ -140,6 +139,10 @@ struct hfi1_pkt_state {
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
unsigned long flags;
unsigned long timeout;
unsigned long timeout_int;
int cpu;
bool in_thread;
};
#define HFI1_PSN_CREDIT 16
......
......@@ -67,9 +67,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
unsigned int rcvctrl_ops = 0;
int ret;
ret = hfi1_init_ctxt(uctxt->sc);
if (ret)
goto done;
hfi1_init_ctxt(uctxt->sc);
uctxt->do_interrupt = &handle_receive_interrupt;
......@@ -82,8 +80,6 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
if (ret)
goto done;
set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
if (uctxt->rcvhdrtail_kvaddr)
clear_rcvhdrtail(uctxt);
......@@ -209,7 +205,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
uctxt->event_flags = 0;
hfi1_clear_tids(uctxt);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
hfi1_clear_ctxt_pkey(dd, uctxt);
hfi1_stats.sps_ctxts--;
hfi1_free_ctxtdata(dd, uctxt);
......
......@@ -3530,6 +3530,26 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
return num_counters;
}
static struct net_device*
mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
u8 port_num,
enum rdma_netdev_t type,
const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *))
{
if (type != RDMA_NETDEV_IPOIB)
return ERR_PTR(-EOPNOTSUPP);
return mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
name, setup);
}
static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
{
return mlx5_rdma_netdev_free(netdev);
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
......@@ -3660,6 +3680,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
dev->ib_dev.free_rdma_netdev = mlx5_ib_free_rdma_netdev;
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
......
......@@ -368,7 +368,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
((void *)(uintptr_t)iova) : addr;
if (crcp)
crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
*crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
*crcp, src, length);
memcpy(dest, src, length);
......
......@@ -114,7 +114,6 @@ enum rxe_device_param {
RXE_MAX_UCONTEXT = 512,
RXE_NUM_PORT = 1,
RXE_NUM_COMP_VECTORS = 1,
RXE_MIN_QP_INDEX = 16,
RXE_MAX_QP_INDEX = 0x00020000,
......
......@@ -1239,7 +1239,7 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->owner = THIS_MODULE;
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
dev->num_comp_vectors = num_possible_cpus();
dev->dev.parent = rxe_dma_device(rxe);
dev->local_dma_lkey = 0;
addrconf_addr_eui48((unsigned char *)&dev->node_guid,
......
......@@ -155,7 +155,66 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev,
return -EOPNOTSUPP;
}
/* Return lane speed in unit of 1e6 bit/sec */
static inline int ib_speed_enum_to_int(int speed)
{
switch (speed) {
case IB_SPEED_SDR:
return SPEED_2500;
case IB_SPEED_DDR:
return SPEED_5000;
case IB_SPEED_QDR:
case IB_SPEED_FDR10:
return SPEED_10000;
case IB_SPEED_FDR:
return SPEED_14000;
case IB_SPEED_EDR:
return SPEED_25000;
}
return SPEED_UNKNOWN;
}
static int ipoib_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *cmd)
{
struct ipoib_dev_priv *priv = netdev_priv(netdev);
struct ib_port_attr attr;
int ret, speed, width;
if (!netif_carrier_ok(netdev)) {
cmd->base.speed = SPEED_UNKNOWN;
cmd->base.duplex = DUPLEX_UNKNOWN;
return 0;
}
ret = ib_query_port(priv->ca, priv->port, &attr);
if (ret < 0)
return -EINVAL;
speed = ib_speed_enum_to_int(attr.active_speed);
width = ib_width_enum_to_int(attr.active_width);
if (speed < 0 || width < 0)
return -EINVAL;
/* Except the following are set, the other members of
* the struct ethtool_link_settings are initialized to
* zero in the function __ethtool_get_link_ksettings.
*/
cmd->base.speed = speed * width;
cmd->base.duplex = DUPLEX_FULL;
cmd->base.phy_address = 0xFF;
cmd->base.autoneg = AUTONEG_ENABLE;
cmd->base.port = PORT_OTHER;
return 0;
}
static const struct ethtool_ops ipoib_ethtool_ops = {
.get_link_ksettings = ipoib_get_link_ksettings,
.get_drvinfo = ipoib_get_drvinfo,
.get_coalesce = ipoib_get_coalesce,
.set_coalesce = ipoib_set_coalesce,
......
......@@ -35,6 +35,6 @@ config MLX5_CORE_EN_DCB
config MLX5_CORE_IPOIB
bool "Mellanox Technologies ConnectX-4 IPoIB offloads support"
depends on MLX5_CORE_EN
default y
default n
---help---
MLX5 IPoIB offloads & acceleration support.
......@@ -30,6 +30,7 @@
* SOFTWARE.
*/
#include <rdma/ib_verbs.h>
#include <linux/mlx5/fs.h>
#include "en.h"
#include "ipoib.h"
......@@ -359,10 +360,10 @@ static int mlx5i_close(struct net_device *netdev)
return 0;
}
#ifdef notusedyet
/* IPoIB RDMA netdev callbacks */
static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
union ib_gid *gid, u16 lid, int set_qkey)
union ib_gid *gid, u16 lid, int set_qkey,
u32 qkey)
{
struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
struct mlx5_core_dev *mdev = epriv->mdev;
......@@ -375,6 +376,12 @@ static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n",
ipriv->qp.qpn, gid->raw);
if (set_qkey) {
mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n",
netdev->name, qkey);
ipriv->qkey = qkey;
}
return err;
}
......@@ -397,15 +404,15 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
}
static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
struct ib_ah *address, u32 dqpn, u32 dqkey)
struct ib_ah *address, u32 dqpn)
{
struct mlx5e_priv *epriv = mlx5i_epriv(dev);
struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)];
struct mlx5_ib_ah *mah = to_mah(address);
struct mlx5i_priv *ipriv = epriv->ppriv;
return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, dqkey);
return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey);
}
#endif
static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
{
......@@ -414,22 +421,23 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n");
return -ENOTSUPP;
return -EOPNOTSUPP;
}
return 0;
}
static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
struct ib_device *ibdev,
const char *name,
void (*setup)(struct net_device *))
struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
struct ib_device *ibdev,
const char *name,
void (*setup)(struct net_device *))
{
const struct mlx5e_profile *profile = &mlx5i_nic_profile;
int nch = profile->max_nch(mdev);
struct net_device *netdev;
struct mlx5i_priv *ipriv;
struct mlx5e_priv *epriv;
struct rdma_netdev *rn;
int err;
if (mlx5i_check_required_hca_cap(mdev)) {
......@@ -464,13 +472,13 @@ static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
mlx5e_attach_netdev(epriv);
netif_carrier_off(netdev);
/* TODO: set rdma_netdev func pointers
* rn = &ipriv->rn;
* rn->hca = ibdev;
* rn->send = mlx5i_xmit;
* rn->attach_mcast = mlx5i_attach_mcast;
* rn->detach_mcast = mlx5i_detach_mcast;
*/
/* set rdma_netdev func pointers */
rn = &ipriv->rn;
rn->hca = ibdev;
rn->send = mlx5i_xmit;
rn->attach_mcast = mlx5i_attach_mcast;
rn->detach_mcast = mlx5i_detach_mcast;
return netdev;
err_free_netdev:
......@@ -482,7 +490,7 @@ static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
}
EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
static void mlx5_rdma_netdev_free(struct net_device *netdev)
void mlx5_rdma_netdev_free(struct net_device *netdev)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
const struct mlx5e_profile *profile = priv->profile;
......
......@@ -40,7 +40,9 @@
/* ipoib rdma netdev's private data structure */
struct mlx5i_priv {
struct rdma_netdev rn; /* keep this first */
struct mlx5_core_qp qp;
u32 qkey;
char *mlx5e_priv[0];
};
......
......@@ -1097,6 +1097,25 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
#ifndef CONFIG_MLX5_CORE_IPOIB
static inline
struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
struct ib_device *ibdev,
const char *name,
void (*setup)(struct net_device *))
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {}
#else
struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
struct ib_device *ibdev,
const char *name,
void (*setup)(struct net_device *));
void mlx5_rdma_netdev_free(struct net_device *netdev);
#endif /* CONFIG_MLX5_CORE_IPOIB */
struct mlx5_profile {
u64 mask;
u8 log_max_qp;
......
......@@ -324,6 +324,7 @@ struct rvt_qp {
u8 r_state; /* opcode of last packet received */
u8 r_flags;
u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 r_adefered; /* defered ack count */
struct list_head rspwait; /* link for waiting to respond */
......
......@@ -1494,6 +1494,7 @@ enum ethtool_link_mode_bit_indices {
#define SPEED_2500 2500
#define SPEED_5000 5000
#define SPEED_10000 10000
#define SPEED_14000 14000
#define SPEED_20000 20000
#define SPEED_25000 25000
#define SPEED_40000 40000
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment