Commit 0492b71c authored by David S. Miller's avatar David S. Miller

Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue

Jeff Kirsher says:

====================
40GbE Intel Wired LAN Driver Updates 2017-04-08

This series contains updates to i40e and i40evf only.

Mitch fixes an issue where the client driver (i40iw) was attempting to
load on x710 devices (which do not support iWARP), so only register with
the client if iWARP is supported.

Jake fixes up error messages to better clarify to the user when adding a
invalid flow type.  Updates the driver to look up the MAC address from
eth_get_platform_mac_address() first before checking what the firmware
provides.  Cleans up code so we are not repeating a duplicate loop, by
checking both transmit and receive queues in a single loop.  Also cleans
up flags never used, so remove the definitions.

Alex does cleanup so that we are always updating pf->flags when a change
is made to the private flags.  Adds support for 3K buffers to the receive
path so that we can provide the additional padding needed in the event
of NET_IP_ALIGN being non-zero or a cache line being greater than 64.
Adds support for build_skb() to i40e/i40evf.

Maciej adjusts the scope of the rtnl lock held during reset because it
was stopping other PFs from running their reset procedures.

Alan reduces code complexity in i40e_detect_recover_hung_queue().
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 417d978f f8b45b74
......@@ -389,13 +389,9 @@ struct i40e_pf {
#define I40E_FLAG_MSIX_ENABLED BIT_ULL(3)
#define I40E_FLAG_RSS_ENABLED BIT_ULL(6)
#define I40E_FLAG_VMDQ_ENABLED BIT_ULL(7)
#define I40E_FLAG_NEED_LINK_UPDATE BIT_ULL(9)
#define I40E_FLAG_IWARP_ENABLED BIT_ULL(10)
#define I40E_FLAG_CLEAN_ADMINQ BIT_ULL(14)
#define I40E_FLAG_FILTER_SYNC BIT_ULL(15)
#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16)
#define I40E_FLAG_PROCESS_MDD_EVENT BIT_ULL(17)
#define I40E_FLAG_PROCESS_VFLR_EVENT BIT_ULL(18)
#define I40E_FLAG_SRIOV_ENABLED BIT_ULL(19)
#define I40E_FLAG_DCB_ENABLED BIT_ULL(20)
#define I40E_FLAG_FD_SB_ENABLED BIT_ULL(21)
......@@ -617,7 +613,6 @@ struct i40e_vsi {
u32 tx_busy;
u64 tx_linearize;
u64 tx_force_wb;
u64 tx_lost_interrupt;
u32 rx_buf_failed;
u32 rx_page_failed;
......@@ -703,9 +698,6 @@ struct i40e_q_vector {
u8 num_ringpairs; /* total number of ring pairs in vector */
#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */
unsigned long hung_detected; /* Set/Reset for hung_detection logic */
cpumask_t affinity_mask;
struct irq_affinity_notify affinity_notify;
......@@ -837,7 +829,7 @@ void i40e_down(struct i40e_vsi *vsi);
extern const char i40e_driver_name[];
extern const char i40e_driver_version_str[];
void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags);
void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags);
void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired);
int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
......
......@@ -89,7 +89,6 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
I40E_VSI_STAT("tx_linearize", tx_linearize),
I40E_VSI_STAT("tx_force_wb", tx_force_wb),
I40E_VSI_STAT("tx_lost_interrupt", tx_lost_interrupt),
I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed),
I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
};
......@@ -1852,7 +1851,7 @@ static void i40e_diag_test(struct net_device *netdev,
* link then the following link test would have
* to be moved to before the reset
*/
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
if (i40e_link_test(netdev, &data[I40E_ETH_TEST_LINK]))
eth_test->flags |= ETH_TEST_FL_FAILED;
......@@ -1868,7 +1867,7 @@ static void i40e_diag_test(struct net_device *netdev,
eth_test->flags |= ETH_TEST_FL_FAILED;
clear_bit(__I40E_TESTING, &pf->state);
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
if (if_running)
i40e_open(netdev);
......@@ -4099,7 +4098,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
*/
if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) ||
((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev)))
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
return 0;
}
......
......@@ -50,13 +50,16 @@ static const char i40e_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporatio
/* a bit of forward declarations */
static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
static void i40e_handle_reset_warning(struct i40e_pf *pf);
static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired);
static int i40e_add_vsi(struct i40e_vsi *vsi);
static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi);
static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit);
static int i40e_setup_misc_vector(struct i40e_pf *pf);
static void i40e_determine_queue_usage(struct i40e_pf *pf);
static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired);
static int i40e_reset(struct i40e_pf *pf);
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
static void i40e_fdir_sb_setup(struct i40e_pf *pf);
static int i40e_veb_get_bw_info(struct i40e_veb *veb);
......@@ -734,7 +737,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
struct i40e_eth_stats *oes;
struct i40e_eth_stats *es; /* device's eth stats */
u32 tx_restart, tx_busy;
u64 tx_lost_interrupt;
struct i40e_ring *p;
u32 rx_page, rx_buf;
u64 bytes, packets;
......@@ -760,7 +762,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
rx_b = rx_p = 0;
tx_b = tx_p = 0;
tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
tx_lost_interrupt = 0;
rx_page = 0;
rx_buf = 0;
rcu_read_lock();
......@@ -779,7 +780,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
tx_busy += p->tx_stats.tx_busy;
tx_linearize += p->tx_stats.tx_linearize;
tx_force_wb += p->tx_stats.tx_force_wb;
tx_lost_interrupt += p->tx_stats.tx_lost_interrupt;
/* Rx queue is part of the same block as Tx queue */
p = &p[1];
......@@ -798,7 +798,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
vsi->tx_busy = tx_busy;
vsi->tx_linearize = tx_linearize;
vsi->tx_force_wb = tx_force_wb;
vsi->tx_lost_interrupt = tx_lost_interrupt;
vsi->rx_page_failed = rx_page;
vsi->rx_buf_failed = rx_buf;
......@@ -3039,6 +3038,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
return -ENOMEM;
}
/* configure Rx buffer alignment */
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
clear_ring_build_skb_enabled(ring);
else
set_ring_build_skb_enabled(ring);
/* cache tail for quicker writes, and clear the reg before use */
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail);
......@@ -3080,13 +3085,15 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
vsi->max_frame = I40E_MAX_RXBUFFER;
vsi->rx_buf_len = I40E_RXBUFFER_2048;
#if (PAGE_SIZE < 8192)
} else if (vsi->netdev->mtu <= ETH_DATA_LEN) {
} else if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
(vsi->netdev->mtu <= ETH_DATA_LEN)) {
vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
#endif
} else {
vsi->max_frame = I40E_MAX_RXBUFFER;
vsi->rx_buf_len = I40E_RXBUFFER_2048;
vsi->rx_buf_len = (PAGE_SIZE < 8192) ? I40E_RXBUFFER_3072 :
I40E_RXBUFFER_2048;
}
/* set up individual rings */
......@@ -4441,7 +4448,7 @@ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf)
* i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled
* @vsi: the VSI being configured
*
* This function waits for the given VSI's queues to be disabled.
* Wait until all queues on a given VSI have been disabled.
**/
static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi)
{
......@@ -4450,7 +4457,7 @@ static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi)
pf_q = vsi->base_queue;
for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
/* Check and wait for the disable status of the queue */
/* Check and wait for the Tx queue */
ret = i40e_pf_txq_wait(pf, pf_q, false);
if (ret) {
dev_info(&pf->pdev->dev,
......@@ -4458,11 +4465,7 @@ static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi)
vsi->seid, pf_q);
return ret;
}
}
pf_q = vsi->base_queue;
for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
/* Check and wait for the disable status of the queue */
/* Check and wait for the Tx queue */
ret = i40e_pf_rxq_wait(pf, pf_q, false);
if (ret) {
dev_info(&pf->pdev->dev,
......@@ -4505,16 +4508,15 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
* @vsi: Pointer to VSI struct
*
* This function checks specified queue for given VSI. Detects hung condition.
* Sets hung bit since it is two step process. Before next run of service task
* if napi_poll runs, it reset 'hung' bit for respective q_vector. If not,
* hung condition remain unchanged and during subsequent run, this function
* issues SW interrupt to recover from hung condition.
* We proactively detect hung TX queues by checking if interrupts are disabled
* but there are pending descriptors. If it appears hung, attempt to recover
* by triggering a SW interrupt.
**/
static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi)
{
struct i40e_ring *tx_ring = NULL;
struct i40e_pf *pf;
u32 head, val, tx_pending_hw;
u32 val, tx_pending;
int i;
pf = vsi->back;
......@@ -4540,47 +4542,15 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi)
else
val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
head = i40e_get_head(tx_ring);
tx_pending = i40e_get_tx_pending(tx_ring);
tx_pending_hw = i40e_get_tx_pending(tx_ring, false);
/* HW is done executing descriptors, updated HEAD write back,
* but SW hasn't processed those descriptors. If interrupt is
* not generated from this point ON, it could result into
* dev_watchdog detecting timeout on those netdev_queue,
* hence proactively trigger SW interrupt.
*/
if (tx_pending_hw && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) {
/* NAPI Poll didn't run and clear since it was set */
if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT,
&tx_ring->q_vector->hung_detected)) {
netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending_hw: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n",
vsi->seid, q_idx, tx_pending_hw,
tx_ring->next_to_clean, head,
tx_ring->next_to_use,
readl(tx_ring->tail));
netdev_info(vsi->netdev, "VSI_seid %d, Issuing force_wb for TX queue %d, Interrupt Reg: 0x%x\n",
vsi->seid, q_idx, val);
i40e_force_wb(vsi, tx_ring->q_vector);
} else {
/* First Chance - detected possible hung */
set_bit(I40E_Q_VECTOR_HUNG_DETECT,
&tx_ring->q_vector->hung_detected);
}
}
/* This is the case where we have interrupts missing,
* so the tx_pending in HW will most likely be 0, but we
* will have tx_pending in SW since the WB happened but the
* interrupt got lost.
/* Interrupts are disabled and TX pending is non-zero,
* trigger the SW interrupt (don't wait). Worst case
* there will be one extra interrupt which may result
* into not cleaning any queues because queues are cleaned.
*/
if ((!tx_pending_hw) && i40e_get_tx_pending(tx_ring, true) &&
(!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) {
local_bh_disable();
if (napi_reschedule(&tx_ring->q_vector->napi))
tx_ring->tx_stats.tx_lost_interrupt++;
local_bh_enable();
}
if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)))
i40e_force_wb(vsi, tx_ring->q_vector);
}
/**
......@@ -5537,6 +5507,8 @@ int i40e_open(struct net_device *netdev)
* Finish initialization of the VSI.
*
* Returns 0 on success, negative value on failure
*
* Note: expects to be called while under rtnl_lock()
**/
int i40e_vsi_open(struct i40e_vsi *vsi)
{
......@@ -5600,7 +5572,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi)
err_setup_tx:
i40e_vsi_free_tx_resources(vsi);
if (vsi == pf->vsi[pf->lan_vsi])
i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
return err;
}
......@@ -5686,12 +5658,14 @@ int i40e_close(struct net_device *netdev)
* i40e_do_reset - Start a PF or Core Reset sequence
* @pf: board private structure
* @reset_flags: which reset is requested
* @lock_acquired: indicates whether or not the lock has been acquired
* before this function was called.
*
* The essential difference in resets is that the PF Reset
* doesn't clear the packet buffers, doesn't reset the PE
* firmware, and doesn't bother the other PFs on the chip.
**/
void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags)
void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
{
u32 val;
......@@ -5737,7 +5711,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags)
* for the Core Reset.
*/
dev_dbg(&pf->pdev->dev, "PFR requested\n");
i40e_handle_reset_warning(pf);
i40e_handle_reset_warning(pf, lock_acquired);
} else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) {
int v;
......@@ -5946,7 +5920,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags)
{
rtnl_lock();
i40e_do_reset(pf, reset_flags);
i40e_do_reset(pf, reset_flags, true);
rtnl_unlock();
}
......@@ -6348,7 +6322,6 @@ static void i40e_reset_subtask(struct i40e_pf *pf)
{
u32 reset_flags = 0;
rtnl_lock();
if (test_bit(__I40E_REINIT_REQUESTED, &pf->state)) {
reset_flags |= BIT(__I40E_REINIT_REQUESTED);
clear_bit(__I40E_REINIT_REQUESTED, &pf->state);
......@@ -6374,18 +6347,19 @@ static void i40e_reset_subtask(struct i40e_pf *pf)
* precedence before starting a new reset sequence.
*/
if (test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) {
i40e_handle_reset_warning(pf);
goto unlock;
i40e_prep_for_reset(pf, false);
i40e_reset(pf);
i40e_rebuild(pf, false, false);
}
/* If we're already down or resetting, just bail */
if (reset_flags &&
!test_bit(__I40E_DOWN, &pf->state) &&
!test_bit(__I40E_CONFIG_BUSY, &pf->state))
i40e_do_reset(pf, reset_flags);
unlock:
!test_bit(__I40E_CONFIG_BUSY, &pf->state)) {
rtnl_lock();
i40e_do_reset(pf, reset_flags, true);
rtnl_unlock();
}
}
/**
......@@ -6873,10 +6847,12 @@ static void i40e_fdir_teardown(struct i40e_pf *pf)
/**
* i40e_prep_for_reset - prep for the core to reset
* @pf: board private structure
* @lock_acquired: indicates whether or not the lock has been acquired
* before this function was called.
*
* Close up the VFs and other things in prep for PF Reset.
**/
static void i40e_prep_for_reset(struct i40e_pf *pf)
static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired)
{
struct i40e_hw *hw = &pf->hw;
i40e_status ret = 0;
......@@ -6891,7 +6867,12 @@ static void i40e_prep_for_reset(struct i40e_pf *pf)
dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
/* quiesce the VSIs and their queues that are not already DOWN */
/* pf_quiesce_all_vsi modifies netdev structures -rtnl_lock needed */
if (!lock_acquired)
rtnl_lock();
i40e_pf_quiesce_all_vsi(pf);
if (!lock_acquired)
rtnl_unlock();
for (v = 0; v < pf->num_alloc_vsi; v++) {
if (pf->vsi[v])
......@@ -6926,29 +6907,39 @@ static void i40e_send_version(struct i40e_pf *pf)
}
/**
* i40e_reset_and_rebuild - reset and rebuild using a saved config
* i40e_reset - wait for core reset to finish reset, reset pf if corer not seen
* @pf: board private structure
* @reinit: if the Main VSI needs to re-initialized.
**/
static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
static int i40e_reset(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
u8 set_fc_aq_fail = 0;
i40e_status ret;
u32 val;
u32 v;
/* Now we wait for GRST to settle out.
* We don't have to delete the VEBs or VSIs from the hw switch
* because the reset will make them disappear.
*/
ret = i40e_pf_reset(hw);
if (ret) {
dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret);
set_bit(__I40E_RESET_FAILED, &pf->state);
goto clear_recovery;
}
clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state);
} else {
pf->pfr_count++;
}
return ret;
}
/**
* i40e_rebuild - rebuild using a saved config
* @pf: board private structure
* @reinit: if the Main VSI needs to re-initialized.
* @lock_acquired: indicates whether or not the lock has been acquired
* before this function was called.
**/
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{
struct i40e_hw *hw = &pf->hw;
u8 set_fc_aq_fail = 0;
i40e_status ret;
u32 val;
int v;
if (test_bit(__I40E_DOWN, &pf->state))
goto clear_recovery;
......@@ -6993,9 +6984,11 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
}
#endif /* CONFIG_I40E_DCB */
/* do basic switch setup */
if (!lock_acquired)
rtnl_lock();
ret = i40e_setup_pf_switch(pf, reinit);
if (ret)
goto end_core_reset;
goto end_unlock;
/* The driver only wants link up/down and module qualification
* reports from firmware. Note the negative logic.
......@@ -7066,7 +7059,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
if (ret) {
dev_info(&pf->pdev->dev,
"rebuild of Main VSI failed: %d\n", ret);
goto end_core_reset;
goto end_unlock;
}
}
......@@ -7117,23 +7110,48 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
/* tell the firmware that we're starting */
i40e_send_version(pf);
end_unlock:
if (!lock_acquired)
rtnl_unlock();
end_core_reset:
clear_bit(__I40E_RESET_FAILED, &pf->state);
clear_recovery:
clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state);
}
/**
* i40e_reset_and_rebuild - reset and rebuild using a saved config
* @pf: board private structure
* @reinit: if the Main VSI needs to re-initialized.
* @lock_acquired: indicates whether or not the lock has been acquired
* before this function was called.
**/
static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
bool lock_acquired)
{
int ret;
/* Now we wait for GRST to settle out.
* We don't have to delete the VEBs or VSIs from the hw switch
* because the reset will make them disappear.
*/
ret = i40e_reset(pf);
if (!ret)
i40e_rebuild(pf, reinit, lock_acquired);
}
/**
* i40e_handle_reset_warning - prep for the PF to reset, reset and rebuild
* @pf: board private structure
*
* Close up the VFs and other things in prep for a Core Reset,
* then get ready to rebuild the world.
* @lock_acquired: indicates whether or not the lock has been acquired
* before this function was called.
**/
static void i40e_handle_reset_warning(struct i40e_pf *pf)
static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
{
i40e_prep_for_reset(pf);
i40e_reset_and_rebuild(pf, false);
i40e_prep_for_reset(pf, lock_acquired);
i40e_reset_and_rebuild(pf, false, lock_acquired);
}
/**
......@@ -8430,6 +8448,7 @@ static int i40e_pf_config_rss(struct i40e_pf *pf)
*
* returns 0 if rss is not enabled, if enabled returns the final rss queue
* count which may be different from the requested queue count.
* Note: expects to be called while under rtnl_lock()
**/
int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
{
......@@ -8445,11 +8464,11 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
u16 qcount;
vsi->req_queue_pairs = queue_count;
i40e_prep_for_reset(pf);
i40e_prep_for_reset(pf, true);
pf->alloc_rss_size = new_rss_size;
i40e_reset_and_rebuild(pf, true);
i40e_reset_and_rebuild(pf, true, true);
/* Discard the user configured hash keys and lut, if less
* queues are enabled.
......@@ -8825,6 +8844,7 @@ static void i40e_clear_rss_lut(struct i40e_vsi *vsi)
* i40e_set_features - set the netdev feature flags
* @netdev: ptr to the netdev being adjusted
* @features: the feature set that the stack is suggesting
* Note: expects to be called while under rtnl_lock()
**/
static int i40e_set_features(struct net_device *netdev,
netdev_features_t features)
......@@ -8848,7 +8868,7 @@ static int i40e_set_features(struct net_device *netdev,
need_reset = i40e_set_ntuple(pf, features);
if (need_reset)
i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
return 0;
}
......@@ -9043,6 +9063,8 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
* is to change the mode then that requires a PF reset to
* allow rebuild of the components with required hardware
* bridge mode enabled.
*
* Note: expects to be called while under rtnl_lock()
**/
static int i40e_ndo_bridge_setlink(struct net_device *dev,
struct nlmsghdr *nlh,
......@@ -9098,7 +9120,8 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
else
pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED),
true);
break;
}
}
......@@ -9307,10 +9330,15 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
if (vsi->type == I40E_VSI_MAIN) {
SET_NETDEV_DEV(netdev, &pf->pdev->dev);
ether_addr_copy(mac_addr, hw->mac.perm_addr);
/* The following steps are necessary to properly keep track of
* MAC-VLAN filters loaded into firmware - first we remove
* filter that is automatically generated by firmware and then
* add new filter both to the driver hash table and firmware.
/* The following steps are necessary for two reasons. First,
* some older NVM configurations load a default MAC-VLAN
* filter that will accept any tagged packet, and we want to
* replace this with a normal filter. Additionally, it is
* possible our MAC address was provided by the platform using
* Open Firmware or similar.
*
* Thus, we need to remove the default filter and install one
* specific to the MAC address.
*/
i40e_rm_default_mac_filter(vsi, mac_addr);
spin_lock_bh(&vsi->mac_filter_hash_lock);
......@@ -10815,20 +10843,18 @@ static void i40e_print_features(struct i40e_pf *pf)
/**
* i40e_get_platform_mac_addr - get platform-specific MAC address
*
* @pdev: PCI device information struct
* @pf: board private structure
*
* Look up the MAC address in Open Firmware on systems that support it,
* and use IDPROM on SPARC if no OF address is found. On return, the
* I40E_FLAG_PF_MAC will be wset in pf->flags if a platform-specific value
* has been selected.
* Look up the MAC address for the device. First we'll try
* eth_platform_get_mac_address, which will check Open Firmware, or arch
* specific fallback. Otherwise, we'll default to the stored value in
* firmware.
**/
static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
{
pf->flags &= ~I40E_FLAG_PF_MAC;
if (!eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr))
pf->flags |= I40E_FLAG_PF_MAC;
if (eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr))
i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr);
}
/**
......@@ -11042,9 +11068,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
i40e_aq_stop_lldp(hw, true, NULL);
}
i40e_get_mac_addr(hw, hw->mac.addr);
/* allow a platform config to override the HW addr */
i40e_get_platform_mac_addr(pdev, pf);
if (!is_valid_ether_addr(hw->mac.addr)) {
dev_info(&pdev->dev, "invalid MAC address %pM\n", hw->mac.addr);
err = -EIO;
......@@ -11073,7 +11099,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&pf->service_task, i40e_service_task);
clear_bit(__I40E_SERVICE_SCHED, &pf->state);
pf->flags |= I40E_FLAG_NEED_LINK_UPDATE;
/* NVM bit on means WoL disabled for the port */
i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
......@@ -11245,10 +11270,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
round_jiffies(jiffies + pf->service_timer_period));
/* add this PF to client device list and launch a client service task */
if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
err = i40e_lan_add_device(pf);
if (err)
dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
err);
}
#define PCI_SPEED_SIZE 8
#define PCI_WIDTH_SIZE 8
......@@ -11426,8 +11453,9 @@ static void i40e_remove(struct pci_dev *pdev)
i40e_vsi_release(pf->vsi[pf->lan_vsi]);
/* remove attached clients */
if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
ret_code = i40e_lan_del_device(pf);
if (ret_code) {
if (ret_code)
dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
ret_code);
}
......@@ -11498,7 +11526,7 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
/* shutdown all operations */
if (!test_bit(__I40E_SUSPENDED, &pf->state)) {
rtnl_lock();
i40e_prep_for_reset(pf);
i40e_prep_for_reset(pf, true);
rtnl_unlock();
}
......@@ -11567,7 +11595,7 @@ static void i40e_pci_error_resume(struct pci_dev *pdev)
return;
rtnl_lock();
i40e_handle_reset_warning(pf);
i40e_handle_reset_warning(pf, true);
rtnl_unlock();
}
......@@ -11630,7 +11658,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
set_bit(__I40E_SUSPENDED, &pf->state);
set_bit(__I40E_DOWN, &pf->state);
rtnl_lock();
i40e_prep_for_reset(pf);
i40e_prep_for_reset(pf, true);
rtnl_unlock();
wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
......@@ -11649,7 +11677,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
i40e_enable_mc_magic_wake(pf);
rtnl_lock();
i40e_prep_for_reset(pf);
i40e_prep_for_reset(pf, true);
rtnl_unlock();
wr32(hw, I40E_PFPM_APM,
......@@ -11683,7 +11711,7 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
i40e_enable_mc_magic_wake(pf);
rtnl_lock();
i40e_prep_for_reset(pf);
i40e_prep_for_reset(pf, true);
rtnl_unlock();
wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
......@@ -11731,7 +11759,7 @@ static int i40e_resume(struct pci_dev *pdev)
if (test_and_clear_bit(__I40E_SUSPENDED, &pf->state)) {
clear_bit(__I40E_DOWN, &pf->state);
rtnl_lock();
i40e_reset_and_rebuild(pf, false);
i40e_reset_and_rebuild(pf, false, true);
rtnl_unlock();
}
......
......@@ -533,14 +533,15 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
break;
default:
/* We cannot support masking based on protocol */
goto unsupported_flow;
dev_info(&pf->pdev->dev, "Unsupported IPv4 protocol 0x%02x\n",
input->ip4_proto);
return -EINVAL;
}
break;
default:
unsupported_flow:
dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
dev_info(&pf->pdev->dev, "Unsupported flow type 0x%02x\n",
input->flow_type);
ret = -EINVAL;
return -EINVAL;
}
/* The buffer allocated here will be normally be freed by
......@@ -710,19 +711,15 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
* @in_sw: is tx_pending being checked in SW or HW
*
* Since there is no access to the ring head register
* in XL710, we need to use our local copies
**/
u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
u32 i40e_get_tx_pending(struct i40e_ring *ring)
{
u32 head, tail;
if (!in_sw)
head = i40e_get_head(ring);
else
head = ring->next_to_clean;
tail = readl(ring->tail);
if (head != tail)
......@@ -845,7 +842,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
* them to be written back in case we stay in NAPI.
* In this mode on X722 we do not enable Interrupt.
*/
unsigned int j = i40e_get_tx_pending(tx_ring, false);
unsigned int j = i40e_get_tx_pending(tx_ring);
if (budget &&
((j / WB_STRIDE) == 0) && (j > 0) &&
......@@ -1141,14 +1138,15 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_bi->dma,
rx_bi->page_offset,
I40E_RXBUFFER_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
PAGE_SIZE,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
rx_bi->page = NULL;
......@@ -1249,6 +1247,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
writel(val, rx_ring->tail);
}
/**
* i40e_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
*
* Returns the offset value for ring into the data buffer.
*/
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}
/**
* i40e_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
......@@ -1270,7 +1279,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
}
/* alloc new page for storage */
page = dev_alloc_page();
page = dev_alloc_pages(i40e_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_page_failed++;
return false;
......@@ -1278,7 +1287,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0,
PAGE_SIZE,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
......@@ -1286,14 +1295,14 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, 0);
__free_pages(page, i40e_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
bi->page_offset = i40e_rx_offset(rx_ring);
/* initialize pagecnt_bias to 1 representing we fully own page */
bi->pagecnt_bias = 1;
......@@ -1346,7 +1355,7 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
I40E_RXBUFFER_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
......@@ -1648,9 +1657,6 @@ static inline bool i40e_page_is_reusable(struct page *page)
**/
static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
{
#if (PAGE_SIZE >= 8192)
unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
#endif
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
......@@ -1663,7 +1669,9 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
if (unlikely((page_count(page) - pagecnt_bias) > 1))
return false;
#else
if (rx_buffer->page_offset > last_offset)
#define I40E_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
if (rx_buffer->page_offset > I40E_LAST_OFFSET)
return false;
#endif
......@@ -1697,9 +1705,9 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = I40E_RXBUFFER_2048;
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
#endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
......@@ -1758,7 +1766,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = I40E_RXBUFFER_2048;
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
......@@ -1807,6 +1815,51 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
return skb;
}
/**
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buffer: Rx buffer to pull data from
* @size: size of buffer to add to skb
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
/* build an skb around the page buffer */
skb = build_skb(va - I40E_SKB_PAD, truesize);
if (unlikely(!skb))
return NULL;
/* update pointers within the skb to store the data */
skb_reserve(skb, I40E_SKB_PAD);
__skb_put(skb, size);
/* buffer is used by skb, update page_offset */
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
#endif
return skb;
}
/**
* i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
......@@ -1824,7 +1877,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
......@@ -1930,6 +1984,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
if (skb)
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
skb = i40e_build_skb(rx_ring, rx_buffer, size);
else
skb = i40e_construct_skb(rx_ring, rx_buffer, size);
......@@ -2125,8 +2181,6 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
return 0;
}
/* Clear hung_detected bit */
clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
......@@ -2262,8 +2316,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
/* Due to lack of space, no more new filters can be programmed */
if (th->syn && (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED))
return;
if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
(!(pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
/* HW ATR eviction will take care of removing filters on FIN
* and RST packets.
*/
......@@ -2325,8 +2378,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
(!(pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
......
......@@ -119,6 +119,7 @@ enum i40e_dyn_idx_t {
#define I40E_RXBUFFER_256 256
#define I40E_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */
#define I40E_RXBUFFER_2048 2048
#define I40E_RXBUFFER_3072 3072 /* Used for large frames w/ padding */
#define I40E_MAX_RXBUFFER 9728 /* largest size for single descriptor */
/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
......@@ -134,6 +135,58 @@ enum i40e_dyn_idx_t {
#define I40E_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
/* Attempt to maximize the headroom available for incoming frames. We
* use a 2K buffer for receives and need 1536/1534 to store the data for
* the frame. This leaves us with 512 bytes of room. From that we need
* to deduct the space needed for the shared info and the padding needed
* to IP align the frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the legacy
* receive path.
*/
#if (PAGE_SIZE < 8192)
#define I40E_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
static inline int i40e_compute_pad(int rx_buf_len)
{
int page_size, pad_size;
page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
return pad_size;
}
static inline int i40e_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (I40E_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = I40E_RXBUFFER_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return i40e_compute_pad(rx_buf_len);
}
#define I40E_SKB_PAD i40e_skb_pad()
#else
#define I40E_2K_TOO_SMALL_WITH_PADDING false
#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif
/**
* i40e_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
......@@ -275,7 +328,6 @@ struct i40e_tx_queue_stats {
u64 tx_done_old;
u64 tx_linearize;
u64 tx_force_wb;
u64 tx_lost_interrupt;
};
struct i40e_rx_queue_stats {
......@@ -342,6 +394,7 @@ struct i40e_ring {
u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1)
/* stats structs */
struct i40e_queue_stats stats;
......@@ -369,6 +422,21 @@ struct i40e_ring {
*/
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
{
return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
}
static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
enum i40e_latency_range {
I40E_LOWEST_LATENCY = 0,
I40E_LOW_LATENCY = 1,
......@@ -390,6 +458,17 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next)
static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
{
#if (PAGE_SIZE < 8192)
if (ring->rx_buf_len > (PAGE_SIZE / 2))
return 1;
#endif
return 0;
}
#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring))
bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
......@@ -400,7 +479,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
void i40e_free_rx_resources(struct i40e_ring *rx_ring);
int i40e_napi_poll(struct napi_struct *napi, int budget);
void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
u32 i40e_get_tx_pending(struct i40e_ring *ring);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
......
......@@ -509,14 +509,15 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_bi->dma,
rx_bi->page_offset,
I40E_RXBUFFER_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
PAGE_SIZE,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
rx_bi->page = NULL;
......@@ -617,6 +618,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
writel(val, rx_ring->tail);
}
/**
* i40e_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
*
* Returns the offset value for ring into the data buffer.
*/
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}
/**
* i40e_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
......@@ -638,7 +650,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
}
/* alloc new page for storage */
page = dev_alloc_page();
page = dev_alloc_pages(i40e_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_page_failed++;
return false;
......@@ -646,7 +658,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0,
PAGE_SIZE,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
I40E_RX_DMA_ATTR);
......@@ -654,14 +666,14 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, 0);
__free_pages(page, i40e_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
bi->page_offset = i40e_rx_offset(rx_ring);
/* initialize pagecnt_bias to 1 representing we fully own page */
bi->pagecnt_bias = 1;
......@@ -714,7 +726,7 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
I40E_RXBUFFER_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
......@@ -1006,9 +1018,6 @@ static inline bool i40e_page_is_reusable(struct page *page)
**/
static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
{
#if (PAGE_SIZE >= 8192)
unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
#endif
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
......@@ -1021,7 +1030,9 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
if (unlikely((page_count(page) - pagecnt_bias) > 1))
return false;
#else
if (rx_buffer->page_offset > last_offset)
#define I40E_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
if (rx_buffer->page_offset > I40E_LAST_OFFSET)
return false;
#endif
......@@ -1055,9 +1066,9 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = I40E_RXBUFFER_2048;
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
#endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
......@@ -1116,7 +1127,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = I40E_RXBUFFER_2048;
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
......@@ -1165,6 +1176,51 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
return skb;
}
/**
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buffer: Rx buffer to pull data from
* @size: size of buffer to add to skb
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
/* build an skb around the page buffer */
skb = build_skb(va - I40E_SKB_PAD, truesize);
if (unlikely(!skb))
return NULL;
/* update pointers within the skb to store the data */
skb_reserve(skb, I40E_SKB_PAD);
__skb_put(skb, size);
/* buffer is used by skb, update page_offset */
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
#endif
return skb;
}
/**
* i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
......@@ -1182,7 +1238,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
......@@ -1283,6 +1340,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
if (skb)
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
skb = i40e_build_skb(rx_ring, rx_buffer, size);
else
skb = i40e_construct_skb(rx_ring, rx_buffer, size);
......
......@@ -106,6 +106,7 @@ enum i40e_dyn_idx_t {
#define I40E_RXBUFFER_256 256
#define I40E_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */
#define I40E_RXBUFFER_2048 2048
#define I40E_RXBUFFER_3072 3072 /* Used for large frames w/ padding */
#define I40E_MAX_RXBUFFER 9728 /* largest size for single descriptor */
/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
......@@ -121,6 +122,58 @@ enum i40e_dyn_idx_t {
#define I40E_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
/* Attempt to maximize the headroom available for incoming frames. We
* use a 2K buffer for receives and need 1536/1534 to store the data for
* the frame. This leaves us with 512 bytes of room. From that we need
* to deduct the space needed for the shared info and the padding needed
* to IP align the frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the legacy
* receive path.
*/
#if (PAGE_SIZE < 8192)
#define I40E_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
static inline int i40e_compute_pad(int rx_buf_len)
{
int page_size, pad_size;
page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
return pad_size;
}
static inline int i40e_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (I40E_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = I40E_RXBUFFER_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return i40e_compute_pad(rx_buf_len);
}
#define I40E_SKB_PAD i40e_skb_pad()
#else
#define I40E_2K_TOO_SMALL_WITH_PADDING false
#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif
/**
* i40e_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
......@@ -328,6 +381,7 @@ struct i40e_ring {
u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1)
/* stats structs */
struct i40e_queue_stats stats;
......@@ -355,6 +409,21 @@ struct i40e_ring {
*/
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
{
return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
}
static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
{
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
enum i40e_latency_range {
I40E_LOWEST_LATENCY = 0,
I40E_LOW_LATENCY = 1,
......@@ -376,6 +445,17 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next)
static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
{
#if (PAGE_SIZE < 8192)
if (ring->rx_buf_len > (PAGE_SIZE / 2))
return 1;
#endif
return 0;
}
#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring))
bool i40evf_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40evf_clean_tx_ring(struct i40e_ring *tx_ring);
......
......@@ -205,7 +205,6 @@ struct i40evf_adapter {
#define I40EVF_FLAG_IN_NETPOLL BIT(4)
#define I40EVF_FLAG_IMIR_ENABLED BIT(5)
#define I40EVF_FLAG_MQ_CAPABLE BIT(6)
#define I40EVF_FLAG_NEED_LINK_UPDATE BIT(7)
#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8)
#define I40EVF_FLAG_RESET_PENDING BIT(9)
#define I40EVF_FLAG_RESET_NEEDED BIT(10)
......
......@@ -694,11 +694,18 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter)
/* Legacy Rx will always default to a 2048 buffer size. */
#if (PAGE_SIZE < 8192)
if (!(adapter->flags & I40EVF_FLAG_LEGACY_RX)) {
/* For jumbo frames on systems with 4K pages we have to use
* an order 1 page, so we might as well increase the size
* of our Rx buffer to make better use of the available space
*/
rx_buf_len = I40E_RXBUFFER_3072;
/* We use a 1536 buffer size for configurations with
* standard Ethernet mtu. On x86 this gives us enough room
* for shared info and 192 bytes of padding.
*/
if (netdev->mtu <= ETH_DATA_LEN)
if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
(netdev->mtu <= ETH_DATA_LEN))
rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
}
#endif
......@@ -706,6 +713,11 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter)
for (i = 0; i < adapter->num_active_queues; i++) {
adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i);
adapter->rx_rings[i].rx_buf_len = rx_buf_len;
if (adapter->flags & I40EVF_FLAG_LEGACY_RX)
clear_ring_build_skb_enabled(&adapter->rx_rings[i]);
else
set_ring_build_skb_enabled(&adapter->rx_rings[i]);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment