Commit c042f0c7 authored by Johannes Berg's avatar Johannes Berg Committed by Luca Coelho

iwlwifi: allocate more receive buffers for HE devices

For HE-capable devices, we need to allocate more receive buffers as
there could be 256 frames aggregated into a single A-MPDU, and then
they might contain A-MSDUs as well. Until 22000 family, the devices
are able to put multiple frames into a single RB and the default RB
size is 4k, but starting from AX210 family this is no longer true.
On the other hand, those newer devices only use 2k receive buffers
(by default).

Modify the code and configuration to allocate an appropriate number
of RBs depending on the device capabilities:

 * 4096 for AX210 HE devices, which use 2k buffers by default,
 * 2048 for 22000 family devices which use 4k buffers by default,
 * 512 for existing 9000 family devices, which doesn't really
   change anything since that's the default before this patch,
 * 512 also for AX210/22000 family devices that don't do HE.

Theoretically, for devices lower than AX210, we wouldn't have to
allocate that many RBs if the RB size was manually increased, but
to support that the code got more complex, and it didn't really
seem necessary as that's a use case for monitor mode only, where
hopefully the wasted memory isn't really much of a concern.

Note that AX210 devices actually support bigger than 12-bit VID,
which is required here as we want to allocate 4096 buffers plus
some for quick recycling, so adjust the code for that as well.
Signed-off-by: default avatarJohannes Berg <johannes.berg@intel.com>
Signed-off-by: default avatarLuca Coelho <luciano.coelho@intel.com>
parent b1a6db13
......@@ -138,6 +138,7 @@ static const struct iwl_tt_params iwl9000_tt_params = {
.thermal_params = &iwl9000_tt_params, \
.apmg_not_supported = true, \
.trans.mq_rx_supported = true, \
.num_rbds = 512, \
.vht_mu_mimo_supported = true, \
.mac_addr_from_csr = true, \
.trans.rf_id = true, \
......
......@@ -431,6 +431,8 @@ struct iwl_fw_mon_regs {
* @uhb_supported: ultra high band channels supported
* @min_256_ba_txq_size: minimum number of slots required in a TX queue which
* supports 256 BA aggregation
* @num_rbds: number of receive buffer descriptors to use
* (only used for multi-queue capable devices)
*
* We enable the driver to be backward compatible wrt. hardware features.
* API differences in uCode shouldn't be handled here but through TLVs
......@@ -485,6 +487,7 @@ struct iwl_cfg {
u8 max_vht_ampdu_exponent;
u8 ucode_api_max;
u8 ucode_api_min;
u16 num_rbds;
u32 min_umac_error_event_table;
u32 extra_phy_cfg_flags;
u32 d3_debug_data_base_addr;
......
......@@ -6,7 +6,7 @@
* GPL LICENSE SUMMARY
*
* Copyright(c) 2017 Intel Deutschland GmbH
* Copyright(c) 2018 Intel Corporation
* Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
......@@ -20,7 +20,7 @@
* BSD LICENSE
*
* Copyright(c) 2017 Intel Deutschland GmbH
* Copyright(c) 2018 Intel Corporation
* Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
......@@ -64,12 +64,12 @@
* the init done for driver command that configures several system modes
* @IWL_CTXT_INFO_EARLY_DEBUG: enable early debug
* @IWL_CTXT_INFO_ENABLE_CDMP: enable core dump
* @IWL_CTXT_INFO_RB_CB_SIZE_POS: position of the RBD Cyclic Buffer Size
* @IWL_CTXT_INFO_RB_CB_SIZE: mask of the RBD Cyclic Buffer Size
* exponent, the actual size is 2**value, valid sizes are 8-2048.
* The value is four bits long. Maximum valid exponent is 12
* @IWL_CTXT_INFO_TFD_FORMAT_LONG: use long TFD Format (the
* default is short format - not supported by the driver)
* @IWL_CTXT_INFO_RB_SIZE_POS: RB size position
* @IWL_CTXT_INFO_RB_SIZE: RB size mask
* (values are IWL_CTXT_INFO_RB_SIZE_*K)
* @IWL_CTXT_INFO_RB_SIZE_1K: Value for 1K RB size
* @IWL_CTXT_INFO_RB_SIZE_2K: Value for 2K RB size
......@@ -83,12 +83,12 @@
* @IWL_CTXT_INFO_RB_SIZE_32K: Value for 32K RB size
*/
enum iwl_context_info_flags {
IWL_CTXT_INFO_AUTO_FUNC_INIT = BIT(0),
IWL_CTXT_INFO_EARLY_DEBUG = BIT(1),
IWL_CTXT_INFO_ENABLE_CDMP = BIT(2),
IWL_CTXT_INFO_RB_CB_SIZE_POS = 4,
IWL_CTXT_INFO_TFD_FORMAT_LONG = BIT(8),
IWL_CTXT_INFO_RB_SIZE_POS = 9,
IWL_CTXT_INFO_AUTO_FUNC_INIT = 0x0001,
IWL_CTXT_INFO_EARLY_DEBUG = 0x0002,
IWL_CTXT_INFO_ENABLE_CDMP = 0x0004,
IWL_CTXT_INFO_RB_CB_SIZE = 0x00f0,
IWL_CTXT_INFO_TFD_FORMAT_LONG = 0x0100,
IWL_CTXT_INFO_RB_SIZE = 0x1e00,
IWL_CTXT_INFO_RB_SIZE_1K = 0x1,
IWL_CTXT_INFO_RB_SIZE_2K = 0x2,
IWL_CTXT_INFO_RB_SIZE_4K = 0x4,
......
......@@ -611,10 +611,7 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans,
*/
#define FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN (0x00000002)
#define MQ_RX_TABLE_SIZE 512
#define MQ_RX_TABLE_MASK (MQ_RX_TABLE_SIZE - 1)
#define MQ_RX_NUM_RBDS (MQ_RX_TABLE_SIZE - 1)
#define RX_POOL_SIZE (MQ_RX_NUM_RBDS + \
#define RX_POOL_SIZE(rbds) ((rbds) - 1 + \
IWL_MAX_RX_HW_QUEUES * \
(RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC))
/* cb size is the exponent */
......
......@@ -206,7 +206,7 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
ctxt_info_gen3->mtr_size =
cpu_to_le16(TFD_QUEUE_CB_SIZE(cmdq_size));
ctxt_info_gen3->mcr_size =
cpu_to_le16(RX_QUEUE_CB_SIZE(MQ_RX_TABLE_SIZE));
cpu_to_le16(RX_QUEUE_CB_SIZE(trans->cfg->num_rbds));
trans_pcie->ctxt_info_gen3 = ctxt_info_gen3;
trans_pcie->prph_info = prph_info;
......
......@@ -193,11 +193,12 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans,
rb_size = IWL_CTXT_INFO_RB_SIZE_4K;
}
BUILD_BUG_ON(RX_QUEUE_CB_SIZE(MQ_RX_TABLE_SIZE) > 0xF);
control_flags = IWL_CTXT_INFO_TFD_FORMAT_LONG |
(RX_QUEUE_CB_SIZE(MQ_RX_TABLE_SIZE) <<
IWL_CTXT_INFO_RB_CB_SIZE_POS) |
(rb_size << IWL_CTXT_INFO_RB_SIZE_POS);
WARN_ON(RX_QUEUE_CB_SIZE(trans->cfg->num_rbds) > 12);
control_flags = IWL_CTXT_INFO_TFD_FORMAT_LONG;
control_flags |=
u32_encode_bits(RX_QUEUE_CB_SIZE(trans->cfg->num_rbds),
IWL_CTXT_INFO_RB_CB_SIZE);
control_flags |= u32_encode_bits(rb_size, IWL_CTXT_INFO_RB_SIZE);
ctxt_info->control.control_flags = cpu_to_le32(control_flags);
/* initialize RX default queue */
......
......@@ -995,6 +995,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
const struct iwl_cfg *cfg = (struct iwl_cfg *)(ent->driver_data);
const struct iwl_cfg *cfg_7265d __maybe_unused = NULL;
struct iwl_trans *iwl_trans;
struct iwl_trans_pcie *trans_pcie;
unsigned long flags;
int ret;
......@@ -1002,6 +1003,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (IS_ERR(iwl_trans))
return PTR_ERR(iwl_trans);
trans_pcie = IWL_TRANS_GET_PCIE_TRANS(iwl_trans);
/* the trans_cfg should never change, so set it now */
iwl_trans->trans_cfg = &cfg->trans;
......@@ -1129,6 +1132,16 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* now set the real cfg we decided to use */
iwl_trans->cfg = cfg;
if (cfg->trans.mq_rx_supported) {
if (WARN_ON(!cfg->num_rbds)) {
ret = -EINVAL;
goto out_free_trans;
}
trans_pcie->num_rx_bufs = cfg->num_rbds;
} else {
trans_pcie->num_rx_bufs = RX_QUEUE_SIZE;
}
if (iwl_trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_8000 &&
iwl_trans_grab_nic_access(iwl_trans, &flags)) {
u32 hw_step;
......
......@@ -513,8 +513,8 @@ struct cont_rec {
*/
struct iwl_trans_pcie {
struct iwl_rxq *rxq;
struct iwl_rx_mem_buffer rx_pool[RX_POOL_SIZE];
struct iwl_rx_mem_buffer *global_table[RX_POOL_SIZE];
struct iwl_rx_mem_buffer *rx_pool;
struct iwl_rx_mem_buffer **global_table;
struct iwl_rb_allocator rba;
union {
struct iwl_context_info *ctxt_info;
......@@ -573,6 +573,7 @@ struct iwl_trans_pcie {
u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS];
u8 max_tbs;
u16 tfd_size;
u16 num_rx_bufs;
enum iwl_amsdu_size rx_buf_size;
bool bc_table_dword;
......
......@@ -510,7 +510,7 @@ void iwl_pcie_free_rbs_pool(struct iwl_trans *trans)
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
int i;
for (i = 0; i < RX_POOL_SIZE; i++) {
for (i = 0; i < RX_POOL_SIZE(trans_pcie->num_rx_bufs); i++) {
if (!trans_pcie->rx_pool[i].page)
continue;
dma_unmap_page(trans->dev, trans_pcie->rx_pool[i].page_dma,
......@@ -738,7 +738,7 @@ static int iwl_pcie_alloc_rxq_dma(struct iwl_trans *trans,
spin_lock_init(&rxq->lock);
if (trans->trans_cfg->mq_rx_supported)
rxq->queue_size = MQ_RX_TABLE_SIZE;
rxq->queue_size = trans->cfg->num_rbds;
else
rxq->queue_size = RX_QUEUE_SIZE;
......@@ -807,8 +807,18 @@ static int iwl_pcie_rx_alloc(struct iwl_trans *trans)
trans_pcie->rxq = kcalloc(trans->num_rx_queues, sizeof(struct iwl_rxq),
GFP_KERNEL);
if (!trans_pcie->rxq)
return -ENOMEM;
trans_pcie->rx_pool = kcalloc(RX_POOL_SIZE(trans_pcie->num_rx_bufs),
sizeof(trans_pcie->rx_pool[0]),
GFP_KERNEL);
trans_pcie->global_table =
kcalloc(RX_POOL_SIZE(trans_pcie->num_rx_bufs),
sizeof(trans_pcie->global_table[0]),
GFP_KERNEL);
if (!trans_pcie->rxq || !trans_pcie->rx_pool ||
!trans_pcie->global_table) {
ret = -ENOMEM;
goto err;
}
spin_lock_init(&rba->lock);
......@@ -845,6 +855,8 @@ static int iwl_pcie_rx_alloc(struct iwl_trans *trans)
trans_pcie->base_rb_stts = NULL;
trans_pcie->base_rb_stts_dma = 0;
}
kfree(trans_pcie->rx_pool);
kfree(trans_pcie->global_table);
kfree(trans_pcie->rxq);
return ret;
......@@ -1081,12 +1093,11 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans)
/* move the pool to the default queue and allocator ownerships */
queue_size = trans->trans_cfg->mq_rx_supported ?
MQ_RX_NUM_RBDS : RX_QUEUE_SIZE;
trans_pcie->num_rx_bufs - 1 : RX_QUEUE_SIZE;
allocator_pool_size = trans->num_rx_queues *
(RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC);
num_alloc = queue_size + allocator_pool_size;
BUILD_BUG_ON(ARRAY_SIZE(trans_pcie->global_table) !=
ARRAY_SIZE(trans_pcie->rx_pool));
for (i = 0; i < num_alloc; i++) {
struct iwl_rx_mem_buffer *rxb = &trans_pcie->rx_pool[i];
......@@ -1177,6 +1188,8 @@ void iwl_pcie_rx_free(struct iwl_trans *trans)
if (rxq->napi.poll)
netif_napi_del(&rxq->napi);
}
kfree(trans_pcie->rx_pool);
kfree(trans_pcie->global_table);
kfree(trans_pcie->rxq);
}
......@@ -1390,13 +1403,12 @@ static struct iwl_rx_mem_buffer *iwl_pcie_get_rxb(struct iwl_trans *trans,
return rxb;
}
/* used_bd is a 32/16 bit but only 12 are used to retrieve the vid */
if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
vid = le16_to_cpu(rxq->cd[i].rbid) & 0x0FFF;
vid = le16_to_cpu(rxq->cd[i].rbid);
else
vid = le32_to_cpu(rxq->bd_32[i]) & 0x0FFF;
vid = le32_to_cpu(rxq->bd_32[i]) & 0x0FFF; /* 12-bit VID */
if (!vid || vid > ARRAY_SIZE(trans_pcie->global_table))
if (!vid || vid > RX_POOL_SIZE(trans_pcie->num_rx_bufs))
goto out_err;
rxb = trans_pcie->global_table[vid - 1];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment