Commit 4730f4a6 authored by Grzegorz Andrejczuk's avatar Grzegorz Andrejczuk Committed by Jason Gunthorpe

IB/hfi1: Activate the dummy netdev

As described in earlier patches, ipoib netdev will share receive
contexts with existing VNIC netdev through a dummy netdev. The
following changes are made to achieve that:
- Set up netdev receive contexts after user contexts. A function is
  added to count the available netdev receive contexts.
- Add functions to set/get receive map table free index.
- Rename NUM_VNIC_MAP_ENTRIES as NUM_NETDEV_MAP_ENTRIES.
- Let the dummy netdev own the receive contexts instead of VNIC.
- Allocate the dummy netdev when the hfi1 device is added and free it
  when the device is removed.
- Initialize AIP RSM rules when the IpoIb rxq is initialized and
  remove the rules when it is de-initialized.
- Convert VNIC to use the dummy netdev.

Link: https://lore.kernel.org/r/20200511160649.173205.4626.stgit@awfm-01.aw.intel.comReviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarSadanand Warrier <sadanand.warrier@intel.com>
Signed-off-by: default avatarGrzegorz Andrejczuk <grzegorz.andrejczuk@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 370caa5b
......@@ -13396,8 +13396,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
static int set_up_context_variables(struct hfi1_devdata *dd)
{
unsigned long num_kernel_contexts;
u16 num_netdev_contexts = HFI1_NUM_VNIC_CTXT;
int total_contexts;
u16 num_netdev_contexts;
int ret;
unsigned ngroups;
int rmt_count;
......@@ -13434,13 +13433,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
num_kernel_contexts = send_contexts - num_vls - 1;
}
/* Accommodate VNIC contexts if possible */
if ((num_kernel_contexts + num_netdev_contexts) > rcv_contexts) {
dd_dev_err(dd, "No receive contexts available for VNIC\n");
num_netdev_contexts = 0;
}
total_contexts = num_kernel_contexts + num_netdev_contexts;
/*
* User contexts:
* - default to 1 user context per real (non-HT) CPU core if
......@@ -13453,15 +13445,19 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
/*
* Adjust the counts given a global max.
*/
if (total_contexts + n_usr_ctxts > rcv_contexts) {
if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) {
dd_dev_err(dd,
"Reducing # user receive contexts to: %d, from %u\n",
rcv_contexts - total_contexts,
"Reducing # user receive contexts to: %u, from %u\n",
(u32)(rcv_contexts - num_kernel_contexts),
n_usr_ctxts);
/* recalculate */
n_usr_ctxts = rcv_contexts - total_contexts;
n_usr_ctxts = rcv_contexts - num_kernel_contexts;
}
num_netdev_contexts =
hfi1_num_netdev_contexts(dd, rcv_contexts -
(num_kernel_contexts + n_usr_ctxts),
&node_affinity.real_cpu_mask);
/*
* The RMT entries are currently allocated as shown below:
* 1. QOS (0 to 128 entries);
......@@ -13487,17 +13483,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
n_usr_ctxts = user_rmt_reduced;
}
total_contexts += n_usr_ctxts;
/* the first N are kernel contexts, the rest are user/vnic contexts */
dd->num_rcv_contexts = total_contexts;
/* the first N are kernel contexts, the rest are user/netdev contexts */
dd->num_rcv_contexts =
num_kernel_contexts + n_usr_ctxts + num_netdev_contexts;
dd->n_krcv_queues = num_kernel_contexts;
dd->first_dyn_alloc_ctxt = num_kernel_contexts;
dd->num_netdev_contexts = num_netdev_contexts;
dd->num_user_contexts = n_usr_ctxts;
dd->freectxts = n_usr_ctxts;
dd_dev_info(dd,
"rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
"rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n",
rcv_contexts,
(int)dd->num_rcv_contexts,
(int)dd->n_krcv_queues,
......@@ -14554,7 +14549,8 @@ static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
u8 ctx_id = 0;
u64 reg;
u32 regoff;
int rmt_start = dd->vnic.rmt_start;
int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
int ctxt_count = hfi1_netdev_ctxt_count(dd);
/* We already have contexts mapped in RMT */
if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) {
......@@ -14562,7 +14558,7 @@ static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
return true;
}
if (hfi1_is_rmt_full(rmt_start, NUM_VNIC_MAP_ENTRIES)) {
if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) {
dd_dev_err(dd, "Not enought RMT entries used = %d\n",
rmt_start);
return false;
......@@ -14570,27 +14566,27 @@ static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n",
rmt_start,
rmt_start + NUM_VNIC_MAP_ENTRIES);
rmt_start + NUM_NETDEV_MAP_ENTRIES);
/* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8;
reg = read_csr(dd, regoff);
for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) {
/* Update map register with netdev context */
j = (rmt_start + i) % 8;
reg &= ~(0xffllu << (j * 8));
reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8);
/* Wrap up netdev ctx index */
ctx_id %= dd->vnic.num_ctxt;
ctx_id %= ctxt_count;
/* Write back map register */
if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) {
dev_dbg(&(dd)->pcidev->dev,
"RMT[%d] =0x%llx\n",
regoff - RCV_RSM_MAP_TABLE, reg);
write_csr(dd, regoff, reg);
regoff += 8;
if (i < (NUM_VNIC_MAP_ENTRIES - 1))
if (i < (NUM_NETDEV_MAP_ENTRIES - 1))
reg = read_csr(dd, regoff);
}
}
......@@ -14617,8 +14613,9 @@ void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
* exist yet
*/
if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) {
int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
struct rsm_rule_data rrd = {
.offset = dd->vnic.rmt_start,
.offset = rmt_start,
.pkt_type = IB_PACKET_TYPE,
.field1_off = LRH_BTH_MATCH_OFFSET,
.mask1 = LRH_BTH_MASK,
......@@ -14627,10 +14624,10 @@ void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
.mask2 = BTH_DESTQP_MASK,
.value2 = BTH_DESTQP_VALUE,
.index1_off = DETH_AIP_SQPN_SELECT_OFFSET +
ilog2(NUM_VNIC_MAP_ENTRIES),
.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES),
ilog2(NUM_NETDEV_MAP_ENTRIES),
.index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
.index2_off = DETH_AIP_SQPN_SELECT_OFFSET,
.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES)
.index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
};
hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd);
......@@ -14640,9 +14637,10 @@ void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
/* Initialize RSM for VNIC */
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
{
int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
struct rsm_rule_data rrd = {
/* Add rule for vnic */
.offset = dd->vnic.rmt_start,
.offset = rmt_start,
.pkt_type = 4,
/* Match 16B packets */
.field1_off = L2_TYPE_MATCH_OFFSET,
......@@ -14654,9 +14652,9 @@ void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
.value2 = L4_16B_ETH_VALUE,
/* Calc context from veswid and entropy */
.index1_off = L4_16B_HDR_VESWID_OFFSET,
.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES),
.index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
.index2_off = L2_16B_ENTROPY_OFFSET,
.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES)
.index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
};
hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd);
......@@ -14690,8 +14688,8 @@ static int init_rxe(struct hfi1_devdata *dd)
init_qos(dd, rmt);
init_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
/* record number of used rsm map entries for vnic */
dd->vnic.rmt_start = rmt->used;
/* record number of used rsm map entries for netdev */
hfi1_netdev_set_free_rmt_idx(dd, rmt->used);
kfree(rmt);
/*
......@@ -15245,6 +15243,10 @@ int hfi1_init_dd(struct hfi1_devdata *dd)
(dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK);
/* alloc netdev data */
if (hfi1_netdev_alloc(dd))
goto bail_cleanup;
ret = set_up_context_variables(dd);
if (ret)
goto bail_cleanup;
......@@ -15345,6 +15347,7 @@ int hfi1_init_dd(struct hfi1_devdata *dd)
hfi1_comp_vectors_clean_up(dd);
msix_clean_up_interrupts(dd);
bail_cleanup:
hfi1_netdev_free(dd);
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
......
......@@ -1771,28 +1771,10 @@ static void process_receive_ib(struct hfi1_packet *packet)
hfi1_ib_rcv(packet);
}
static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
{
/* Packet received in VNIC context via RSM */
if (packet->rcd->is_vnic)
return true;
if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) &&
(hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR))
return true;
return false;
}
static void process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
if (hfi1_is_vnic_packet(packet)) {
hfi1_vnic_bypass_rcv(packet);
return;
}
if (hfi1_setup_bypass_packet(packet))
return;
......
......@@ -1047,23 +1047,10 @@ struct hfi1_asic_data {
#define NUM_MAP_ENTRIES 256
#define NUM_MAP_REGS 32
/*
* Number of VNIC contexts used. Ensure it is less than or equal to
* max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
*/
#define HFI1_NUM_VNIC_CTXT 8
/* Number of VNIC RSM entries */
#define NUM_VNIC_MAP_ENTRIES 8
/* Virtual NIC information */
struct hfi1_vnic_data {
struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
struct kmem_cache *txreq_cache;
struct xarray vesws;
u8 num_vports;
u8 rmt_start;
u8 num_ctxt;
};
struct hfi1_vnic_vport_info;
......@@ -1419,6 +1406,7 @@ struct hfi1_devdata {
struct hfi1_vnic_data vnic;
/* Lock to protect IRQ SRC register access */
spinlock_t irq_src_lock;
int vnic_num_vports;
struct net_device *dummy_netdev;
/* Keeps track of IPoIB RSM rule users */
......
......@@ -69,6 +69,7 @@
#include "affinity.h"
#include "vnic.h"
#include "exp_rcv.h"
#include "netdev.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
......@@ -1665,9 +1666,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
/* setup vnic */
hfi1_vnic_setup(dd);
ret = hfi1_register_ib_device(dd);
/*
......@@ -1706,7 +1704,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
......@@ -1751,8 +1748,8 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
/* cleanup vnic */
hfi1_vnic_cleanup(dd);
/* free netdev data */
hfi1_netdev_free(dd);
/*
* Disable the IB link, disable interrupts on the device,
......
......@@ -74,8 +74,15 @@ int hfi1_ipoib_rxq_init(struct net_device *netdev)
{
struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
struct hfi1_devdata *dd = ipoib_priv->dd;
int ret;
return hfi1_netdev_rx_init(dd);
ret = hfi1_netdev_rx_init(dd);
if (ret)
return ret;
hfi1_init_aip_rsm(dd);
return ret;
}
void hfi1_ipoib_rxq_deinit(struct net_device *netdev)
......@@ -83,5 +90,6 @@ void hfi1_ipoib_rxq_deinit(struct net_device *netdev)
struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
struct hfi1_devdata *dd = ipoib_priv->dd;
hfi1_deinit_aip_rsm(dd);
hfi1_netdev_rx_destroy(dd);
}
......@@ -172,7 +172,8 @@ static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd,
const char *name)
{
int nr = msix_request_irq(rcd->dd, rcd, handler, thread,
IRQ_RCVCTXT, name);
rcd->is_vnic ? IRQ_NETDEVCTXT : IRQ_RCVCTXT,
name);
if (nr < 0)
return nr;
......@@ -371,15 +372,16 @@ void msix_clean_up_interrupts(struct hfi1_devdata *dd)
}
/**
* msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
* msix_netdev_syncrhonize_irq() - netdev IRQ synchronize
* @dd: valid devdata
*/
void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
void msix_netdev_synchronize_irq(struct hfi1_devdata *dd)
{
int i;
int ctxt_count = hfi1_netdev_ctxt_count(dd);
for (i = 0; i < dd->vnic.num_ctxt; i++) {
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
for (i = 0; i < ctxt_count; i++) {
struct hfi1_ctxtdata *rcd = hfi1_netdev_get_ctxt(dd, i);
struct hfi1_msix_entry *me;
me = &dd->msix_info.msix_entries[rcd->msix_intr];
......
......@@ -60,7 +60,7 @@ int msix_request_sdma_irq(struct sdma_engine *sde);
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
/* Netdev interface */
void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
void msix_netdev_synchronize_irq(struct hfi1_devdata *dd);
int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd);
#endif
......@@ -82,6 +82,25 @@ struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
return priv->rxq[ctxt].rcd;
}
static inline
int hfi1_netdev_get_free_rmt_idx(struct hfi1_devdata *dd)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
return priv->rmt_start;
}
static inline
void hfi1_netdev_set_free_rmt_idx(struct hfi1_devdata *dd, int rmt_idx)
{
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
priv->rmt_start = rmt_idx;
}
u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
struct cpumask *cpu_mask);
void hfi1_netdev_enable_queues(struct hfi1_devdata *dd);
void hfi1_netdev_disable_queues(struct hfi1_devdata *dd);
int hfi1_netdev_rx_init(struct hfi1_devdata *dd);
......
......@@ -140,6 +140,50 @@ static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_priv *priv,
return rc;
}
/**
* hfi1_num_netdev_contexts - Count of netdev recv contexts to use.
* @dd: device on which to allocate netdev contexts
* @available_contexts: count of available receive contexts
* @cpu_mask: mask of possible cpus to include for contexts
*
* Return: count of physical cores on a node or the remaining available recv
* contexts for netdev recv context usage up to the maximum of
* HFI1_MAX_NETDEV_CTXTS.
* A value of 0 can be returned when acceleration is explicitly turned off,
* a memory allocation error occurs or when there are no available contexts.
*
*/
u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
struct cpumask *cpu_mask)
{
cpumask_var_t node_cpu_mask;
unsigned int available_cpus;
if (!HFI1_CAP_IS_KSET(AIP))
return 0;
/* Always give user contexts priority over netdev contexts */
if (available_contexts == 0) {
dd_dev_info(dd, "No receive contexts available for netdevs.\n");
return 0;
}
if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) {
dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n");
return 0;
}
cpumask_and(node_cpu_mask, cpu_mask,
cpumask_of_node(pcibus_to_node(dd->pcidev->bus)));
available_cpus = cpumask_weight(node_cpu_mask);
free_cpumask_var(node_cpu_mask);
return min3(available_cpus, available_contexts,
(u32)HFI1_MAX_NETDEV_CTXTS);
}
static int hfi1_netdev_rxq_init(struct net_device *dev)
{
int i;
......@@ -238,7 +282,7 @@ static void disable_queues(struct hfi1_netdev_priv *priv)
{
int i;
msix_vnic_synchronize_irq(priv->dd);
msix_netdev_synchronize_irq(priv->dd);
for (i = 0; i < priv->num_rx_q; i++) {
struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
......
#ifndef _HFI1_VNIC_H
#define _HFI1_VNIC_H
/*
* Copyright(c) 2017 Intel Corporation.
* Copyright(c) 2017 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
......@@ -69,6 +69,7 @@
#define HFI1_VNIC_SC_SHIFT 4
#define HFI1_VNIC_MAX_QUEUE 16
#define HFI1_NUM_VNIC_CTXT 8
/**
* struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
......@@ -104,7 +105,6 @@ struct hfi1_vnic_rx_queue {
struct hfi1_vnic_vport_info *vinfo;
struct net_device *netdev;
struct napi_struct napi;
struct sk_buff_head skbq;
};
/**
......@@ -146,7 +146,6 @@ struct hfi1_vnic_vport_info {
/* vnic hfi1 internal functions */
void hfi1_vnic_setup(struct hfi1_devdata *dd);
void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment