Commit d63a6dcf authored by Hariprasad Shenai's avatar Hariprasad Shenai Committed by David S. Miller

cxgb4: Use BAR2 Going To Sleep (GTS) for T5 and later.

Use BAR2 GTS for T5. If we are on T4 use the old doorbell mechanism;
otherwise ue the new BAR2 mechanism. Use BAR2 doorbells for refilling FL's.

Based on original work by Casey Leedom <leedom@chelsio.com>
Signed-off-by: default avatarHariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 825bae5d
...@@ -431,6 +431,7 @@ struct sge_fl { /* SGE free-buffer queue state */ ...@@ -431,6 +431,7 @@ struct sge_fl { /* SGE free-buffer queue state */
struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */
__be64 *desc; /* address of HW Rx descriptor ring */ __be64 *desc; /* address of HW Rx descriptor ring */
dma_addr_t addr; /* bus address of HW ring start */ dma_addr_t addr; /* bus address of HW ring start */
u64 udb; /* BAR2 offset of User Doorbell area */
}; };
/* A packet gather list */ /* A packet gather list */
...@@ -459,6 +460,7 @@ struct sge_rspq { /* state for an SGE response queue */ ...@@ -459,6 +460,7 @@ struct sge_rspq { /* state for an SGE response queue */
u16 abs_id; /* absolute SGE id for the response q */ u16 abs_id; /* absolute SGE id for the response q */
__be64 *desc; /* address of HW response ring */ __be64 *desc; /* address of HW response ring */
dma_addr_t phys_addr; /* physical address of the ring */ dma_addr_t phys_addr; /* physical address of the ring */
u64 udb; /* BAR2 offset of User Doorbell area */
unsigned int iqe_len; /* entry size */ unsigned int iqe_len; /* entry size */
unsigned int size; /* capacity of response queue */ unsigned int size; /* capacity of response queue */
struct adapter *adap; struct adapter *adap;
...@@ -516,7 +518,7 @@ struct sge_txq { ...@@ -516,7 +518,7 @@ struct sge_txq {
int db_disabled; int db_disabled;
unsigned short db_pidx; unsigned short db_pidx;
unsigned short db_pidx_inc; unsigned short db_pidx_inc;
u64 udb; u64 udb; /* BAR2 offset of User Doorbell area */
}; };
struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */
......
...@@ -521,9 +521,23 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) ...@@ -521,9 +521,23 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
val = PIDX(q->pend_cred / 8); val = PIDX(q->pend_cred / 8);
if (!is_t4(adap->params.chip)) if (!is_t4(adap->params.chip))
val |= DBTYPE(1); val |= DBTYPE(1);
val |= DBPRIO(1);
wmb(); wmb();
t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) |
QID(q->cntxt_id) | val); /* If we're on T4, use the old doorbell mechanism; otherwise
* use the new BAR2 mechanism.
*/
if (is_t4(adap->params.chip)) {
t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
val | QID(q->cntxt_id));
} else {
writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL);
/* This Write memory Barrier will force the write to
* the User Doorbell area to be flushed.
*/
wmb();
}
q->pend_cred &= 7; q->pend_cred &= 7;
} }
} }
...@@ -859,30 +873,66 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) ...@@ -859,30 +873,66 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
*/ */
static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
{ {
unsigned int *wr, index; wmb(); /* write descriptors before telling HW */
if (is_t4(adap->params.chip)) {
u32 val = PIDX(n);
unsigned long flags; unsigned long flags;
wmb(); /* write descriptors before telling HW */ /* For T4 we need to participate in the Doorbell Recovery
* mechanism.
*/
spin_lock_irqsave(&q->db_lock, flags); spin_lock_irqsave(&q->db_lock, flags);
if (!q->db_disabled) { if (!q->db_disabled)
if (is_t4(adap->params.chip)) {
t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
QID(q->cntxt_id) | PIDX(n)); QID(q->cntxt_id) | val);
else
q->db_pidx_inc += n;
q->db_pidx = q->pidx;
spin_unlock_irqrestore(&q->db_lock, flags);
} else { } else {
u32 val = PIDX_T5(n);
/* T4 and later chips share the same PIDX field offset within
* the doorbell, but T5 and later shrank the field in order to
* gain a bit for Doorbell Priority. The field was absurdly
* large in the first place (14 bits) so we just use the T5
* and later limits and warn if a Queue ID is too large.
*/
WARN_ON(val & DBPRIO(1));
/* For T5 and later we use the Write-Combine mapped BAR2 User
* Doorbell mechanism. If we're only writing a single TX
* Descriptor and TX Write Combining hasn't been disabled, we
* can use the Write Combining Gather Buffer; otherwise we use
* the simple doorbell.
*/
if (n == 1) { if (n == 1) {
index = q->pidx ? (q->pidx - 1) : (q->size - 1); int index = (q->pidx
wr = (unsigned int *)&q->desc[index]; ? (q->pidx - 1)
: (q->size - 1));
unsigned int *wr = (unsigned int *)&q->desc[index];
cxgb_pio_copy((u64 __iomem *) cxgb_pio_copy((u64 __iomem *)
(adap->bar2 + q->udb + 64), (adap->bar2 + q->udb +
SGE_UDB_WCDOORBELL),
(u64 *)wr); (u64 *)wr);
} else } else {
writel(n, adap->bar2 + q->udb + 8); writel(val, adap->bar2 + q->udb + SGE_UDB_KDOORBELL);
}
/* This Write Memory Barrier will force the write to the User
* Doorbell area to be flushed. This is needed to prevent
* writes on different CPUs for the same queue from hitting
* the adapter out of order. This is required when some Work
* Requests take the Write Combine Gather Buffer path (user
* doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some
* take the traditional path where we simply increment the
* PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the
* hardware DMA read the actual Work Request.
*/
wmb(); wmb();
} }
} else
q->db_pidx_inc += n;
q->db_pidx = q->pidx;
spin_unlock_irqrestore(&q->db_lock, flags);
} }
/** /**
...@@ -1916,6 +1966,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) ...@@ -1916,6 +1966,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
unsigned int params; unsigned int params;
struct sge_rspq *q = container_of(napi, struct sge_rspq, napi); struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
int work_done = process_responses(q, budget); int work_done = process_responses(q, budget);
u32 val;
if (likely(work_done < budget)) { if (likely(work_done < budget)) {
napi_complete(napi); napi_complete(napi);
...@@ -1924,8 +1975,14 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) ...@@ -1924,8 +1975,14 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
} else } else
params = QINTR_TIMER_IDX(7); params = QINTR_TIMER_IDX(7);
t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) | val = CIDXINC(work_done) | SEINTARM(params);
INGRESSQID((u32)q->cntxt_id) | SEINTARM(params)); if (is_t4(q->adap->params.chip)) {
t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS),
val | INGRESSQID((u32)q->cntxt_id));
} else {
writel(val, q->adap->bar2 + q->udb + SGE_UDB_GTS);
wmb();
}
return work_done; return work_done;
} }
...@@ -1949,6 +2006,7 @@ static unsigned int process_intrq(struct adapter *adap) ...@@ -1949,6 +2006,7 @@ static unsigned int process_intrq(struct adapter *adap)
unsigned int credits; unsigned int credits;
const struct rsp_ctrl *rc; const struct rsp_ctrl *rc;
struct sge_rspq *q = &adap->sge.intrq; struct sge_rspq *q = &adap->sge.intrq;
u32 val;
spin_lock(&adap->sge.intrq_lock); spin_lock(&adap->sge.intrq_lock);
for (credits = 0; ; credits++) { for (credits = 0; ; credits++) {
...@@ -1967,8 +2025,14 @@ static unsigned int process_intrq(struct adapter *adap) ...@@ -1967,8 +2025,14 @@ static unsigned int process_intrq(struct adapter *adap)
rspq_next(q); rspq_next(q);
} }
t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) | val = CIDXINC(credits) | SEINTARM(q->intr_params);
INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params)); if (is_t4(adap->params.chip)) {
t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
val | INGRESSQID(q->cntxt_id));
} else {
writel(val, adap->bar2 + q->udb + SGE_UDB_GTS);
wmb();
}
spin_unlock(&adap->sge.intrq_lock); spin_unlock(&adap->sge.intrq_lock);
return credits; return credits;
} }
...@@ -2149,6 +2213,51 @@ static void sge_tx_timer_cb(unsigned long data) ...@@ -2149,6 +2213,51 @@ static void sge_tx_timer_cb(unsigned long data)
mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2)); mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
} }
/**
* udb_address - return the BAR2 User Doorbell address for a Queue
* @adap: the adapter
* @cntxt_id: the Queue Context ID
* @qpp: Queues Per Page (for all PFs)
*
* Returns the BAR2 address of the user Doorbell associated with the
* indicated Queue Context ID. Note that this is only applicable
* for T5 and later.
*/
static u64 udb_address(struct adapter *adap, unsigned int cntxt_id,
unsigned int qpp)
{
u64 udb;
unsigned int s_qpp;
unsigned short udb_density;
unsigned long qpshift;
int page;
BUG_ON(is_t4(adap->params.chip));
s_qpp = (QUEUESPERPAGEPF0 +
(QUEUESPERPAGEPF1 - QUEUESPERPAGEPF0) * adap->fn);
udb_density = 1 << ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK);
qpshift = PAGE_SHIFT - ilog2(udb_density);
udb = cntxt_id << qpshift;
udb &= PAGE_MASK;
page = udb / PAGE_SIZE;
udb += (cntxt_id - (page * udb_density)) * SGE_UDB_SIZE;
return udb;
}
static u64 udb_address_eq(struct adapter *adap, unsigned int cntxt_id)
{
return udb_address(adap, cntxt_id,
t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF));
}
static u64 udb_address_iq(struct adapter *adap, unsigned int cntxt_id)
{
return udb_address(adap, cntxt_id,
t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF));
}
int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
struct net_device *dev, int intr_idx, struct net_device *dev, int intr_idx,
struct sge_fl *fl, rspq_handler_t hnd) struct sge_fl *fl, rspq_handler_t hnd)
...@@ -2214,6 +2323,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, ...@@ -2214,6 +2323,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
iq->next_intr_params = iq->intr_params; iq->next_intr_params = iq->intr_params;
iq->cntxt_id = ntohs(c.iqid); iq->cntxt_id = ntohs(c.iqid);
iq->abs_id = ntohs(c.physiqid); iq->abs_id = ntohs(c.physiqid);
if (!is_t4(adap->params.chip))
iq->udb = udb_address_iq(adap, iq->cntxt_id);
iq->size--; /* subtract status entry */ iq->size--; /* subtract status entry */
iq->netdev = dev; iq->netdev = dev;
iq->handler = hnd; iq->handler = hnd;
...@@ -2229,6 +2340,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, ...@@ -2229,6 +2340,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
fl->pidx = fl->cidx = 0; fl->pidx = fl->cidx = 0;
fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0; fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0;
adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl; adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl;
/* Note, we must initialize the Free List User Doorbell
* address before refilling the Free List!
*/
if (!is_t4(adap->params.chip))
fl->udb = udb_address_eq(adap, fl->cntxt_id);
refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
} }
return 0; return 0;
...@@ -2254,21 +2371,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, ...@@ -2254,21 +2371,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
{ {
q->cntxt_id = id; q->cntxt_id = id;
if (!is_t4(adap->params.chip)) { if (!is_t4(adap->params.chip))
unsigned int s_qpp; q->udb = udb_address_eq(adap, q->cntxt_id);
unsigned short udb_density;
unsigned long qpshift;
int page;
s_qpp = QUEUESPERPAGEPF1 * adap->fn;
udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap,
SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp));
qpshift = PAGE_SHIFT - ilog2(udb_density);
q->udb = q->cntxt_id << qpshift;
q->udb &= PAGE_MASK;
page = q->udb / PAGE_SIZE;
q->udb += (q->cntxt_id - (page * udb_density)) * 128;
}
q->in_use = 0; q->in_use = 0;
q->cidx = q->pidx = 0; q->cidx = q->pidx = 0;
......
...@@ -157,8 +157,27 @@ ...@@ -157,8 +157,27 @@
#define QUEUESPERPAGEPF0_MASK 0x0000000fU #define QUEUESPERPAGEPF0_MASK 0x0000000fU
#define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK) #define QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK)
#define QUEUESPERPAGEPF0 0
#define QUEUESPERPAGEPF1 4 #define QUEUESPERPAGEPF1 4
/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues.
* The User Doorbells are each 128 bytes in length with a Simple Doorbell at
* offsets 8x and a Write Combining single 64-byte Egress Queue Unit
* (X_IDXSIZE_UNIT) Gather Buffer interface at offset 64. For Ingress Queues,
* we have a Going To Sleep register at offsets 8x+4.
*
* As noted above, we have many instances of the Simple Doorbell and Going To
* Sleep registers at offsets 8x and 8x+4, respectively. We want to use a
* non-64-byte aligned offset for the Simple Doorbell in order to attempt to
* avoid buffering of the writes to the Simple Doorbell and we want to use a
* non-contiguous offset for the Going To Sleep writes in order to avoid
* possible combining between them.
*/
#define SGE_UDB_SIZE 128
#define SGE_UDB_KDOORBELL 8
#define SGE_UDB_GTS 20
#define SGE_UDB_WCDOORBELL 64
#define SGE_INT_CAUSE1 0x1024 #define SGE_INT_CAUSE1 0x1024
#define SGE_INT_CAUSE2 0x1030 #define SGE_INT_CAUSE2 0x1030
#define SGE_INT_CAUSE3 0x103c #define SGE_INT_CAUSE3 0x103c
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment