Commit c01035f1 authored by David S. Miller's avatar David S. Miller

Merge branch 'cxgb4-next'

Hariprasad Shenai says:

====================
cxgb4: Use new BAR2 GTS for T5, adds adaptive rx and few Device ID's

This patch series adds support to use new BAR2 GTS for T5 adapter.
Adds support for adaptive rx. Remove redundant variable from a macro of
cxgb4vf driver. Adds Device ID for new adapters.

The patches series is created against 'net-next' tree.
And includes patches on cxgb4 and cxgb4vf driver.

We have included all the maintainers of respective drivers. Kindly review the
change and let us know in case of any review comments.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 825bae5d e553ec3f
......@@ -431,6 +431,7 @@ struct sge_fl { /* SGE free-buffer queue state */
struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */
__be64 *desc; /* address of HW Rx descriptor ring */
dma_addr_t addr; /* bus address of HW ring start */
u64 udb; /* BAR2 offset of User Doorbell area */
};
/* A packet gather list */
......@@ -451,6 +452,7 @@ struct sge_rspq { /* state for an SGE response queue */
u8 gen; /* current generation bit */
u8 intr_params; /* interrupt holdoff parameters */
u8 next_intr_params; /* holdoff params for next interrupt */
u8 adaptive_rx;
u8 pktcnt_idx; /* interrupt packet threshold */
u8 uld; /* ULD handling this queue */
u8 idx; /* queue index within its group */
......@@ -459,6 +461,7 @@ struct sge_rspq { /* state for an SGE response queue */
u16 abs_id; /* absolute SGE id for the response q */
__be64 *desc; /* address of HW response ring */
dma_addr_t phys_addr; /* physical address of the ring */
u64 udb; /* BAR2 offset of User Doorbell area */
unsigned int iqe_len; /* entry size */
unsigned int size; /* capacity of response queue */
struct adapter *adap;
......@@ -516,7 +519,7 @@ struct sge_txq {
int db_disabled;
unsigned short db_pidx;
unsigned short db_pidx_inc;
u64 udb;
u64 udb; /* BAR2 offset of User Doorbell area */
};
struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */
......
......@@ -284,6 +284,8 @@ static const struct pci_device_id cxgb4_pci_tbl[] = {
CH_DEVICE(0x5084, 4),
CH_DEVICE(0x5085, 4),
CH_DEVICE(0x5086, 4),
CH_DEVICE(0x5087, 4),
CH_DEVICE(0x5088, 4),
CH_DEVICE(0x5401, 4),
CH_DEVICE(0x5402, 4),
CH_DEVICE(0x5403, 4),
......@@ -312,6 +314,8 @@ static const struct pci_device_id cxgb4_pci_tbl[] = {
CH_DEVICE(0x5484, 4),
CH_DEVICE(0x5485, 4),
CH_DEVICE(0x5486, 4),
CH_DEVICE(0x5487, 4),
CH_DEVICE(0x5488, 4),
{ 0, }
};
......@@ -2749,8 +2753,31 @@ static int set_rx_intr_params(struct net_device *dev,
return 0;
}
static int set_adaptive_rx_setting(struct net_device *dev, int adaptive_rx)
{
int i;
struct port_info *pi = netdev_priv(dev);
struct adapter *adap = pi->adapter;
struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
for (i = 0; i < pi->nqsets; i++, q++)
q->rspq.adaptive_rx = adaptive_rx;
return 0;
}
static int get_adaptive_rx_setting(struct net_device *dev)
{
struct port_info *pi = netdev_priv(dev);
struct adapter *adap = pi->adapter;
struct sge_eth_rxq *q = &adap->sge.ethrxq[pi->first_qset];
return q->rspq.adaptive_rx;
}
static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
{
set_adaptive_rx_setting(dev, c->use_adaptive_rx_coalesce);
return set_rx_intr_params(dev, c->rx_coalesce_usecs,
c->rx_max_coalesced_frames);
}
......@@ -2764,6 +2791,7 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
c->rx_coalesce_usecs = qtimer_val(adap, rq);
c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ?
adap->sge.counter_val[rq->pktcnt_idx] : 0;
c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev);
return 0;
}
......
......@@ -203,6 +203,9 @@ enum {
RX_LARGE_MTU_BUF = 0x3, /* large MTU buffer */
};
static int timer_pkt_quota[] = {1, 1, 2, 3, 4, 5};
#define MIN_NAPI_WORK 1
static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d)
{
return d->dma_addr & ~(dma_addr_t)RX_BUF_FLAGS;
......@@ -521,9 +524,23 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
val = PIDX(q->pend_cred / 8);
if (!is_t4(adap->params.chip))
val |= DBTYPE(1);
val |= DBPRIO(1);
wmb();
t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) |
QID(q->cntxt_id) | val);
/* If we're on T4, use the old doorbell mechanism; otherwise
* use the new BAR2 mechanism.
*/
if (is_t4(adap->params.chip)) {
t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
val | QID(q->cntxt_id));
} else {
writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL);
/* This Write memory Barrier will force the write to
* the User Doorbell area to be flushed.
*/
wmb();
}
q->pend_cred &= 7;
}
}
......@@ -859,30 +876,66 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
*/
static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
{
unsigned int *wr, index;
wmb(); /* write descriptors before telling HW */
if (is_t4(adap->params.chip)) {
u32 val = PIDX(n);
unsigned long flags;
wmb(); /* write descriptors before telling HW */
/* For T4 we need to participate in the Doorbell Recovery
* mechanism.
*/
spin_lock_irqsave(&q->db_lock, flags);
if (!q->db_disabled) {
if (is_t4(adap->params.chip)) {
if (!q->db_disabled)
t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
QID(q->cntxt_id) | PIDX(n));
QID(q->cntxt_id) | val);
else
q->db_pidx_inc += n;
q->db_pidx = q->pidx;
spin_unlock_irqrestore(&q->db_lock, flags);
} else {
u32 val = PIDX_T5(n);
/* T4 and later chips share the same PIDX field offset within
* the doorbell, but T5 and later shrank the field in order to
* gain a bit for Doorbell Priority. The field was absurdly
* large in the first place (14 bits) so we just use the T5
* and later limits and warn if a Queue ID is too large.
*/
WARN_ON(val & DBPRIO(1));
/* For T5 and later we use the Write-Combine mapped BAR2 User
* Doorbell mechanism. If we're only writing a single TX
* Descriptor and TX Write Combining hasn't been disabled, we
* can use the Write Combining Gather Buffer; otherwise we use
* the simple doorbell.
*/
if (n == 1) {
index = q->pidx ? (q->pidx - 1) : (q->size - 1);
wr = (unsigned int *)&q->desc[index];
int index = (q->pidx
? (q->pidx - 1)
: (q->size - 1));
unsigned int *wr = (unsigned int *)&q->desc[index];
cxgb_pio_copy((u64 __iomem *)
(adap->bar2 + q->udb + 64),
(adap->bar2 + q->udb +
SGE_UDB_WCDOORBELL),
(u64 *)wr);
} else
writel(n, adap->bar2 + q->udb + 8);
} else {
writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL);
}
/* This Write Memory Barrier will force the write to the User
* Doorbell area to be flushed. This is needed to prevent
* writes on different CPUs for the same queue from hitting
* the adapter out of order. This is required when some Work
* Requests take the Write Combine Gather Buffer path (user
* doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some
* take the traditional path where we simply increment the
* PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the
* hardware DMA read the actual Work Request.
*/
wmb();
}
} else
q->db_pidx_inc += n;
q->db_pidx = q->pidx;
spin_unlock_irqrestore(&q->db_lock, flags);
}
/**
......@@ -1916,16 +1969,40 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
unsigned int params;
struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
int work_done = process_responses(q, budget);
u32 val;
if (likely(work_done < budget)) {
int timer_index;
napi_complete(napi);
timer_index = QINTR_TIMER_IDX_GET(q->next_intr_params);
if (q->adaptive_rx) {
if (work_done > max(timer_pkt_quota[timer_index],
MIN_NAPI_WORK))
timer_index = (timer_index + 1);
else
timer_index = timer_index - 1;
timer_index = clamp(timer_index, 0, SGE_TIMERREGS - 1);
q->next_intr_params = QINTR_TIMER_IDX(timer_index) |
V_QINTR_CNT_EN;
params = q->next_intr_params;
} else {
params = q->next_intr_params;
q->next_intr_params = q->intr_params;
}
} else
params = QINTR_TIMER_IDX(7);
t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) |
INGRESSQID((u32)q->cntxt_id) | SEINTARM(params));
val = CIDXINC(work_done) | SEINTARM(params);
if (is_t4(q->adap->params.chip)) {
t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS),
val | INGRESSQID((u32)q->cntxt_id));
} else {
writel(val, q->adap->bar2 + q->udb + SGE_UDB_GTS);
wmb();
}
return work_done;
}
......@@ -1949,6 +2026,7 @@ static unsigned int process_intrq(struct adapter *adap)
unsigned int credits;
const struct rsp_ctrl *rc;
struct sge_rspq *q = &adap->sge.intrq;
u32 val;
spin_lock(&adap->sge.intrq_lock);
for (credits = 0; ; credits++) {
......@@ -1967,8 +2045,14 @@ static unsigned int process_intrq(struct adapter *adap)
rspq_next(q);
}
t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) |
INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params));
val = CIDXINC(credits) | SEINTARM(q->intr_params);
if (is_t4(adap->params.chip)) {
t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
val | INGRESSQID(q->cntxt_id));
} else {
writel(val, adap->bar2 + q->udb + SGE_UDB_GTS);
wmb();
}
spin_unlock(&adap->sge.intrq_lock);
return credits;
}
......@@ -2149,6 +2233,51 @@ static void sge_tx_timer_cb(unsigned long data)
mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
}
/**
* udb_address - return the BAR2 User Doorbell address for a Queue
* @adap: the adapter
* @cntxt_id: the Queue Context ID
* @qpp: Queues Per Page (for all PFs)
*
* Returns the BAR2 address of the user Doorbell associated with the
* indicated Queue Context ID. Note that this is only applicable
* for T5 and later.
*/
static u64 udb_address(struct adapter *adap, unsigned int cntxt_id,
unsigned int qpp)
{
u64 udb;
unsigned int s_qpp;
unsigned short udb_density;
unsigned long qpshift;
int page;
BUG_ON(is_t4(adap->params.chip));
s_qpp = (QUEUESPERPAGEPF0 +
(QUEUESPERPAGEPF1 - QUEUESPERPAGEPF0) * adap->fn);
udb_density = 1 << ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK);
qpshift = PAGE_SHIFT - ilog2(udb_density);
udb = cntxt_id << qpshift;
udb &= PAGE_MASK;
page = udb / PAGE_SIZE;
udb += (cntxt_id - (page * udb_density)) * SGE_UDB_SIZE;
return udb;
}
static u64 udb_address_eq(struct adapter *adap, unsigned int cntxt_id)
{
return udb_address(adap, cntxt_id,
t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF));
}
static u64 udb_address_iq(struct adapter *adap, unsigned int cntxt_id)
{
return udb_address(adap, cntxt_id,
t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF));
}
int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
struct net_device *dev, int intr_idx,
struct sge_fl *fl, rspq_handler_t hnd)
......@@ -2214,6 +2343,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
iq->next_intr_params = iq->intr_params;
iq->cntxt_id = ntohs(c.iqid);
iq->abs_id = ntohs(c.physiqid);
if (!is_t4(adap->params.chip))
iq->udb = udb_address_iq(adap, iq->cntxt_id);
iq->size--; /* subtract status entry */
iq->netdev = dev;
iq->handler = hnd;
......@@ -2229,6 +2360,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
fl->pidx = fl->cidx = 0;
fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0;
adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl;
/* Note, we must initialize the Free List User Doorbell
* address before refilling the Free List!
*/
if (!is_t4(adap->params.chip))
fl->udb = udb_address_eq(adap, fl->cntxt_id);
refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
}
return 0;
......@@ -2254,21 +2391,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
{
q->cntxt_id = id;
if (!is_t4(adap->params.chip)) {
unsigned int s_qpp;
unsigned short udb_density;
unsigned long qpshift;
int page;
s_qpp = QUEUESPERPAGEPF1 * adap->fn;
udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap,
SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp));
qpshift = PAGE_SHIFT - ilog2(udb_density);
q->udb = q->cntxt_id << qpshift;
q->udb &= PAGE_MASK;
page = q->udb / PAGE_SIZE;
q->udb += (q->cntxt_id - (page * udb_density)) * 128;
}
if (!is_t4(adap->params.chip))
q->udb = udb_address_eq(adap, q->cntxt_id);
q->in_use = 0;
q->cidx = q->pidx = 0;
......
......@@ -135,6 +135,7 @@ struct rsp_ctrl {
#define RSPD_GEN(x) ((x) >> 7)
#define RSPD_TYPE(x) (((x) >> 4) & 3)
#define V_QINTR_CNT_EN 0x0
#define QINTR_CNT_EN 0x1
#define QINTR_TIMER_IDX(x) ((x) << 1)
#define QINTR_TIMER_IDX_GET(x) (((x) >> 1) & 0x7)
......
......@@ -77,6 +77,7 @@
#define PIDX_T5(x) (((x) >> S_PIDX_T5) & M_PIDX_T5)
#define SGE_TIMERREGS 6
#define SGE_PF_GTS 0x4
#define INGRESSQID_MASK 0xffff0000U
#define INGRESSQID_SHIFT 16
......@@ -157,8 +158,27 @@
#define QUEUESPERPAGEPF0_MASK 0x0000000fU
#define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK)
#define QUEUESPERPAGEPF0 0
#define QUEUESPERPAGEPF1 4
/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues.
* The User Doorbells are each 128 bytes in length with a Simple Doorbell at
* offsets 8x and a Write Combining single 64-byte Egress Queue Unit
* (X_IDXSIZE_UNIT) Gather Buffer interface at offset 64. For Ingress Queues,
* we have a Going To Sleep register at offsets 8x+4.
*
* As noted above, we have many instances of the Simple Doorbell and Going To
* Sleep registers at offsets 8x and 8x+4, respectively. We want to use a
* non-64-byte aligned offset for the Simple Doorbell in order to attempt to
* avoid buffering of the writes to the Simple Doorbell and we want to use a
* non-contiguous offset for the Going To Sleep writes in order to avoid
* possible combining between them.
*/
#define SGE_UDB_SIZE 128
#define SGE_UDB_KDOORBELL 8
#define SGE_UDB_GTS 20
#define SGE_UDB_WCDOORBELL 64
#define SGE_INT_CAUSE1 0x1024
#define SGE_INT_CAUSE2 0x1030
#define SGE_INT_CAUSE3 0x103c
......
......@@ -2907,60 +2907,62 @@ static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
/*
* PCI Device registration data structures.
*/
#define CH_DEVICE(devid, idx) \
{ PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx }
#define CH_DEVICE(devid) \
{ PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }
static const struct pci_device_id cxgb4vf_pci_tbl[] = {
CH_DEVICE(0xb000, 0), /* PE10K FPGA */
CH_DEVICE(0x4801, 0), /* T420-cr */
CH_DEVICE(0x4802, 0), /* T422-cr */
CH_DEVICE(0x4803, 0), /* T440-cr */
CH_DEVICE(0x4804, 0), /* T420-bch */
CH_DEVICE(0x4805, 0), /* T440-bch */
CH_DEVICE(0x4806, 0), /* T460-ch */
CH_DEVICE(0x4807, 0), /* T420-so */
CH_DEVICE(0x4808, 0), /* T420-cx */
CH_DEVICE(0x4809, 0), /* T420-bt */
CH_DEVICE(0x480a, 0), /* T404-bt */
CH_DEVICE(0x480d, 0), /* T480-cr */
CH_DEVICE(0x480e, 0), /* T440-lp-cr */
CH_DEVICE(0x4880, 0),
CH_DEVICE(0x4880, 1),
CH_DEVICE(0x4880, 2),
CH_DEVICE(0x4880, 3),
CH_DEVICE(0x4880, 4),
CH_DEVICE(0x4880, 5),
CH_DEVICE(0x4880, 6),
CH_DEVICE(0x4880, 7),
CH_DEVICE(0x4880, 8),
CH_DEVICE(0x5801, 0), /* T520-cr */
CH_DEVICE(0x5802, 0), /* T522-cr */
CH_DEVICE(0x5803, 0), /* T540-cr */
CH_DEVICE(0x5804, 0), /* T520-bch */
CH_DEVICE(0x5805, 0), /* T540-bch */
CH_DEVICE(0x5806, 0), /* T540-ch */
CH_DEVICE(0x5807, 0), /* T520-so */
CH_DEVICE(0x5808, 0), /* T520-cx */
CH_DEVICE(0x5809, 0), /* T520-bt */
CH_DEVICE(0x580a, 0), /* T504-bt */
CH_DEVICE(0x580b, 0), /* T520-sr */
CH_DEVICE(0x580c, 0), /* T504-bt */
CH_DEVICE(0x580d, 0), /* T580-cr */
CH_DEVICE(0x580e, 0), /* T540-lp-cr */
CH_DEVICE(0x580f, 0), /* Amsterdam */
CH_DEVICE(0x5810, 0), /* T580-lp-cr */
CH_DEVICE(0x5811, 0), /* T520-lp-cr */
CH_DEVICE(0x5812, 0), /* T560-cr */
CH_DEVICE(0x5813, 0), /* T580-cr */
CH_DEVICE(0x5814, 0), /* T580-so-cr */
CH_DEVICE(0x5815, 0), /* T502-bt */
CH_DEVICE(0x5880, 0),
CH_DEVICE(0x5881, 0),
CH_DEVICE(0x5882, 0),
CH_DEVICE(0x5883, 0),
CH_DEVICE(0x5884, 0),
CH_DEVICE(0x5885, 0),
CH_DEVICE(0x5886, 0),
CH_DEVICE(0xb000), /* PE10K FPGA */
CH_DEVICE(0x4801), /* T420-cr */
CH_DEVICE(0x4802), /* T422-cr */
CH_DEVICE(0x4803), /* T440-cr */
CH_DEVICE(0x4804), /* T420-bch */
CH_DEVICE(0x4805), /* T440-bch */
CH_DEVICE(0x4806), /* T460-ch */
CH_DEVICE(0x4807), /* T420-so */
CH_DEVICE(0x4808), /* T420-cx */
CH_DEVICE(0x4809), /* T420-bt */
CH_DEVICE(0x480a), /* T404-bt */
CH_DEVICE(0x480d), /* T480-cr */
CH_DEVICE(0x480e), /* T440-lp-cr */
CH_DEVICE(0x4880),
CH_DEVICE(0x4880),
CH_DEVICE(0x4880),
CH_DEVICE(0x4880),
CH_DEVICE(0x4880),
CH_DEVICE(0x4880),
CH_DEVICE(0x4880),
CH_DEVICE(0x4880),
CH_DEVICE(0x4880),
CH_DEVICE(0x5801), /* T520-cr */
CH_DEVICE(0x5802), /* T522-cr */
CH_DEVICE(0x5803), /* T540-cr */
CH_DEVICE(0x5804), /* T520-bch */
CH_DEVICE(0x5805), /* T540-bch */
CH_DEVICE(0x5806), /* T540-ch */
CH_DEVICE(0x5807), /* T520-so */
CH_DEVICE(0x5808), /* T520-cx */
CH_DEVICE(0x5809), /* T520-bt */
CH_DEVICE(0x580a), /* T504-bt */
CH_DEVICE(0x580b), /* T520-sr */
CH_DEVICE(0x580c), /* T504-bt */
CH_DEVICE(0x580d), /* T580-cr */
CH_DEVICE(0x580e), /* T540-lp-cr */
CH_DEVICE(0x580f), /* Amsterdam */
CH_DEVICE(0x5810), /* T580-lp-cr */
CH_DEVICE(0x5811), /* T520-lp-cr */
CH_DEVICE(0x5812), /* T560-cr */
CH_DEVICE(0x5813), /* T580-cr */
CH_DEVICE(0x5814), /* T580-so-cr */
CH_DEVICE(0x5815), /* T502-bt */
CH_DEVICE(0x5880),
CH_DEVICE(0x5881),
CH_DEVICE(0x5882),
CH_DEVICE(0x5883),
CH_DEVICE(0x5884),
CH_DEVICE(0x5885),
CH_DEVICE(0x5886),
CH_DEVICE(0x5887),
CH_DEVICE(0x5888),
{ 0, }
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment