Commit 6818c4d5 authored by Joshua Hay's avatar Joshua Hay Committed by Tony Nguyen

idpf: add splitq start_xmit

Add start_xmit support for split queue model. To start with, add the
necessary checks to linearize the skb if it uses more number of
buffers than the hardware supported limit. Stop the transmit queue
if there are no enough descriptors available for the skb to use or
if there we're going to potentially overrun the completion queue.
Finally prepare the descriptor with all the required
information and update the tail.
Signed-off-by: default avatarJoshua Hay <joshua.a.hay@intel.com>
Co-developed-by: default avatarAlan Brady <alan.brady@intel.com>
Signed-off-by: default avatarAlan Brady <alan.brady@intel.com>
Co-developed-by: default avatarMadhu Chittim <madhu.chittim@intel.com>
Signed-off-by: default avatarMadhu Chittim <madhu.chittim@intel.com>
Co-developed-by: default avatarPhani Burra <phani.r.burra@intel.com>
Signed-off-by: default avatarPhani Burra <phani.r.burra@intel.com>
Reviewed-by: default avatarSridhar Samudrala <sridhar.samudrala@intel.com>
Reviewed-by: default avatarWillem de Bruijn <willemb@google.com>
Co-developed-by: default avatarPavan Kumar Linga <pavan.kumar.linga@intel.com>
Signed-off-by: default avatarPavan Kumar Linga <pavan.kumar.linga@intel.com>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
parent d4d55871
......@@ -13,6 +13,7 @@ struct idpf_vport_max_q;
#include <linux/aer.h>
#include <linux/etherdevice.h>
#include <linux/pci.h>
#include <linux/bitfield.h>
#include "virtchnl2.h"
#include "idpf_lan_txrx.h"
......
......@@ -56,6 +56,64 @@ enum idpf_rss_hash {
BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV6_UDP) | \
BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV6_UDP))
#define IDPF_TXD_CTX_QW1_MSS_S 50
#define IDPF_TXD_CTX_QW1_MSS_M GENMASK_ULL(63, 50)
#define IDPF_TXD_CTX_QW1_TSO_LEN_S 30
#define IDPF_TXD_CTX_QW1_TSO_LEN_M GENMASK_ULL(47, 30)
#define IDPF_TXD_CTX_QW1_CMD_S 4
#define IDPF_TXD_CTX_QW1_CMD_M GENMASK_ULL(15, 4)
#define IDPF_TXD_CTX_QW1_DTYPE_S 0
#define IDPF_TXD_CTX_QW1_DTYPE_M GENMASK_ULL(3, 0)
#define IDPF_TXD_QW1_L2TAG1_S 48
#define IDPF_TXD_QW1_L2TAG1_M GENMASK_ULL(63, 48)
#define IDPF_TXD_QW1_TX_BUF_SZ_S 34
#define IDPF_TXD_QW1_TX_BUF_SZ_M GENMASK_ULL(47, 34)
#define IDPF_TXD_QW1_OFFSET_S 16
#define IDPF_TXD_QW1_OFFSET_M GENMASK_ULL(33, 16)
#define IDPF_TXD_QW1_CMD_S 4
#define IDPF_TXD_QW1_CMD_M GENMASK_ULL(15, 4)
#define IDPF_TXD_QW1_DTYPE_S 0
#define IDPF_TXD_QW1_DTYPE_M GENMASK_ULL(3, 0)
enum idpf_tx_desc_dtype_value {
IDPF_TX_DESC_DTYPE_DATA = 0,
IDPF_TX_DESC_DTYPE_CTX = 1,
/* DTYPE 2 is reserved
* DTYPE 3 is free for future use
* DTYPE 4 is reserved
*/
IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX = 5,
/* DTYPE 6 is reserved */
IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 = 7,
/* DTYPE 8, 9 are free for future use
* DTYPE 10 is reserved
* DTYPE 11 is free for future use
*/
IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE = 12,
/* DTYPE 13, 14 are free for future use */
/* DESC_DONE - HW has completed write-back of descriptor */
IDPF_TX_DESC_DTYPE_DESC_DONE = 15,
};
enum idpf_tx_base_desc_cmd_bits {
IDPF_TX_DESC_CMD_EOP = BIT(0),
IDPF_TX_DESC_CMD_RS = BIT(1),
/* only on VFs else RSVD */
IDPF_TX_DESC_CMD_ICRC = BIT(2),
IDPF_TX_DESC_CMD_IL2TAG1 = BIT(3),
IDPF_TX_DESC_CMD_RSVD1 = BIT(4),
IDPF_TX_DESC_CMD_IIPT_IPV6 = BIT(5),
IDPF_TX_DESC_CMD_IIPT_IPV4 = BIT(6),
IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM = GENMASK(6, 5),
IDPF_TX_DESC_CMD_RSVD2 = BIT(7),
IDPF_TX_DESC_CMD_L4T_EOFT_TCP = BIT(8),
IDPF_TX_DESC_CMD_L4T_EOFT_SCTP = BIT(9),
IDPF_TX_DESC_CMD_L4T_EOFT_UDP = GENMASK(9, 8),
IDPF_TX_DESC_CMD_RSVD3 = BIT(10),
IDPF_TX_DESC_CMD_RSVD4 = BIT(11),
};
/* Transmit descriptors */
/* splitq tx buf, singleq tx buf and singleq compl desc */
struct idpf_base_tx_desc {
......@@ -74,4 +132,89 @@ struct idpf_splitq_tx_compl_desc {
u8 rsvd; /* Reserved */
}; /* writeback used with completion queues */
/* Common cmd field defines for all desc except Flex Flow Scheduler (0x0C) */
enum idpf_tx_flex_desc_cmd_bits {
IDPF_TX_FLEX_DESC_CMD_EOP = BIT(0),
IDPF_TX_FLEX_DESC_CMD_RS = BIT(1),
IDPF_TX_FLEX_DESC_CMD_RE = BIT(2),
IDPF_TX_FLEX_DESC_CMD_IL2TAG1 = BIT(3),
IDPF_TX_FLEX_DESC_CMD_DUMMY = BIT(4),
IDPF_TX_FLEX_DESC_CMD_CS_EN = BIT(5),
IDPF_TX_FLEX_DESC_CMD_FILT_AU_EN = BIT(6),
IDPF_TX_FLEX_DESC_CMD_FILT_AU_EVICT = BIT(7),
};
struct idpf_flex_tx_desc {
__le64 buf_addr; /* Packet buffer address */
struct {
#define IDPF_FLEX_TXD_QW1_DTYPE_S 0
#define IDPF_FLEX_TXD_QW1_DTYPE_M GENMASK(4, 0)
#define IDPF_FLEX_TXD_QW1_CMD_S 5
#define IDPF_FLEX_TXD_QW1_CMD_M GENMASK(15, 5)
__le16 cmd_dtype;
/* DTYPE=IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 (0x07) */
struct {
__le16 l2tag1;
__le16 l2tag2;
} l2tags;
__le16 buf_size;
} qw1;
};
struct idpf_flex_tx_sched_desc {
__le64 buf_addr; /* Packet buffer address */
/* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE_16B (0x0C) */
struct {
u8 cmd_dtype;
#define IDPF_TXD_FLEX_FLOW_DTYPE_M GENMASK(4, 0)
#define IDPF_TXD_FLEX_FLOW_CMD_EOP BIT(5)
#define IDPF_TXD_FLEX_FLOW_CMD_CS_EN BIT(6)
#define IDPF_TXD_FLEX_FLOW_CMD_RE BIT(7)
/* [23:23] Horizon Overflow bit, [22:0] timestamp */
u8 ts[3];
#define IDPF_TXD_FLOW_SCH_HORIZON_OVERFLOW_M BIT(7)
__le16 compl_tag;
__le16 rxr_bufsize;
#define IDPF_TXD_FLEX_FLOW_RXR BIT(14)
#define IDPF_TXD_FLEX_FLOW_BUFSIZE_M GENMASK(13, 0)
} qw1;
};
/* Common cmd fields for all flex context descriptors
* Note: these defines already account for the 5 bit dtype in the cmd_dtype
* field
*/
enum idpf_tx_flex_ctx_desc_cmd_bits {
IDPF_TX_FLEX_CTX_DESC_CMD_TSO = BIT(5),
IDPF_TX_FLEX_CTX_DESC_CMD_TSYN_EN = BIT(6),
IDPF_TX_FLEX_CTX_DESC_CMD_L2TAG2 = BIT(7),
IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_UPLNK = BIT(9),
IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_LOCAL = BIT(10),
IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_TARGETVSI = GENMASK(10, 9),
};
/* Standard flex descriptor TSO context quad word */
struct idpf_flex_tx_tso_ctx_qw {
__le32 flex_tlen;
#define IDPF_TXD_FLEX_CTX_TLEN_M GENMASK(17, 0)
#define IDPF_TXD_FLEX_TSO_CTX_FLEX_S 24
__le16 mss_rt;
#define IDPF_TXD_FLEX_CTX_MSS_RT_M GENMASK(13, 0)
u8 hdr_len;
u8 flex;
};
struct idpf_flex_tx_ctx_desc {
/* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX (0x05) */
struct {
struct idpf_flex_tx_tso_ctx_qw qw0;
struct {
__le16 cmd_dtype;
u8 flex[6];
} qw1;
} tso;
};
#endif /* _IDPF_LAN_TXRX_H_ */
......@@ -1557,6 +1557,7 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem)
static const struct net_device_ops idpf_netdev_ops_splitq = {
.ndo_open = idpf_open,
.ndo_stop = idpf_stop,
.ndo_start_xmit = idpf_tx_splitq_start,
};
static const struct net_device_ops idpf_netdev_ops_singleq = {
......
This diff is collapsed.
......@@ -5,6 +5,8 @@
#define _IDPF_TXRX_H_
#include <net/page_pool/helpers.h>
#include <net/tcp.h>
#include <net/netdev_queues.h>
#define IDPF_LARGE_MAX_Q 256
#define IDPF_MAX_Q 16
......@@ -67,18 +69,62 @@
#define IDPF_PACKET_HDR_PAD \
(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2)
/* Minimum number of descriptors between 2 descriptors with the RE bit set;
* only relevant in flow scheduling mode
*/
#define IDPF_TX_SPLITQ_RE_MIN_GAP 64
#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i) \
(&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i]))
#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i) \
(&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i]))
#define IDPF_FLEX_TX_DESC(txq, i) \
(&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i]))
#define IDPF_FLEX_TX_CTX_DESC(txq, i) \
(&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i]))
#define IDPF_DESC_UNUSED(txq) \
((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \
(txq)->next_to_clean - (txq)->next_to_use - 1)
#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->buf_stack.top)
#define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \
(txq)->desc_count >> 2)
#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1)
/* Determine the absolute number of completions pending, i.e. the number of
* completions that are expected to arrive on the TX completion queue.
*/
#define IDPF_TX_COMPLQ_PENDING(txq) \
(((txq)->num_completions_pending >= (txq)->complq->num_completions ? \
0 : U64_MAX) + \
(txq)->num_completions_pending - (txq)->complq->num_completions)
#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16
#define IDPF_SPLITQ_TX_INVAL_COMPL_TAG -1
/* Adjust the generation for the completion tag and wrap if necessary */
#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \
((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \
0 : (txq)->compl_tag_cur_gen)
#define IDPF_TX_MIN_PKT_LEN 17
#define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS)
#define IDPF_TX_FLAGS_TSO BIT(0)
union idpf_tx_flex_desc {
struct idpf_flex_tx_desc q; /* queue based scheduling */
struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */
};
/**
* struct idpf_tx_buf
* @next_to_watch: Next descriptor to clean
* @skb: Pointer to the skb
* @dma: DMA address
* @len: DMA length
* @bytecount: Number of bytes
* @gso_segs: Number of GSO segments
* @compl_tag: Splitq only, unique identifier for a buffer. Used to compare
* with completion tag returned in buffer completion event.
* Because the completion tag is expected to be the same in all
......@@ -94,6 +140,13 @@
* this buffer entry should be skipped.
*/
struct idpf_tx_buf {
void *next_to_watch;
struct sk_buff *skb;
DEFINE_DMA_UNMAP_ADDR(dma);
DEFINE_DMA_UNMAP_LEN(len);
unsigned int bytecount;
unsigned short gso_segs;
union {
int compl_tag;
......@@ -117,6 +170,64 @@ struct idpf_buf_lifo {
struct idpf_tx_stash **bufs;
};
/**
* struct idpf_tx_offload_params - Offload parameters for a given packet
* @tx_flags: Feature flags enabled for this packet
* @tso_len: Total length of payload to segment
* @mss: Segment size
* @tso_segs: Number of segments to be sent
* @tso_hdr_len: Length of headers to be duplicated
* @td_cmd: Command field to be inserted into descriptor
*/
struct idpf_tx_offload_params {
u32 tx_flags;
u32 tso_len;
u16 mss;
u16 tso_segs;
u16 tso_hdr_len;
u16 td_cmd;
};
/**
* struct idpf_tx_splitq_params
* @dtype: General descriptor info
* @eop_cmd: Type of EOP
* @compl_tag: Associated tag for completion
* @td_tag: Descriptor tunneling tag
* @offload: Offload parameters
*/
struct idpf_tx_splitq_params {
enum idpf_tx_desc_dtype_value dtype;
u16 eop_cmd;
union {
u16 compl_tag;
u16 td_tag;
};
struct idpf_tx_offload_params offload;
};
#define IDPF_TX_MIN_PKT_LEN 17
#define IDPF_TX_DESCS_FOR_SKB_DATA_PTR 1
#define IDPF_TX_DESCS_PER_CACHE_LINE (L1_CACHE_BYTES / \
sizeof(struct idpf_flex_tx_desc))
#define IDPF_TX_DESCS_FOR_CTX 1
/* TX descriptors needed, worst case */
#define IDPF_TX_DESC_NEEDED (MAX_SKB_FRAGS + IDPF_TX_DESCS_FOR_CTX + \
IDPF_TX_DESCS_PER_CACHE_LINE + \
IDPF_TX_DESCS_FOR_SKB_DATA_PTR)
/* The size limit for a transmit buffer in a descriptor is (16K - 1).
* In order to align with the read requests we will align the value to
* the nearest 4K which represents our maximum read request size.
*/
#define IDPF_TX_MAX_READ_REQ_SIZE SZ_4K
#define IDPF_TX_MAX_DESC_DATA (SZ_16K - 1)
#define IDPF_TX_MAX_DESC_DATA_ALIGNED \
ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE)
#define IDPF_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
......@@ -344,6 +455,23 @@ struct idpf_q_vector {
char *name;
};
struct idpf_rx_queue_stats {
/* stub */
};
struct idpf_tx_queue_stats {
u64_stats_t lso_pkts;
u64_stats_t linearize;
u64_stats_t q_busy;
u64_stats_t skb_drops;
u64_stats_t dma_map_errs;
};
union idpf_queue_stats {
struct idpf_rx_queue_stats rx;
struct idpf_tx_queue_stats tx;
};
#define IDPF_ITR_DYNAMIC 1
#define IDPF_ITR_20K 0x0032
#define IDPF_ITR_TX_DEF IDPF_ITR_20K
......@@ -382,6 +510,8 @@ struct idpf_q_vector {
* @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model
* only relevant to RX queue.
* @flags: See enum idpf_queue_flags_t
* @q_stats: See union idpf_queue_stats
* @stats_sync: See struct u64_stats_sync
* @rx_hsplit_en: RX headsplit enable
* @rx_hbuf_size: Header buffer size
* @rx_buf_size: Buffer size
......@@ -395,6 +525,10 @@ struct idpf_q_vector {
* @desc_ring: Descriptor ring memory
* @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
* @tx_min_pkt_len: Min supported packet length
* @num_completions: Only relevant for TX completion queue. It tracks the
* number of completions received to compare against the
* number of completions pending, as accumulated by the
* TX queues.
* @buf_stack: Stack of empty buffers to store buffer info for out of order
* buffer completions. See struct idpf_buf_lifo.
* @compl_tag_bufid_m: Completion tag buffer id mask
......@@ -450,6 +584,9 @@ struct idpf_queue {
u16 next_to_alloc;
DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
union idpf_queue_stats q_stats;
struct u64_stats_sync stats_sync;
bool rx_hsplit_en;
u16 rx_hbuf_size;
u16 rx_buf_size;
......@@ -465,6 +602,8 @@ struct idpf_queue {
u16 tx_max_bufs;
u8 tx_min_pkt_len;
u32 num_completions;
struct idpf_buf_lifo buf_stack;
u16 compl_tag_bufid_m;
......@@ -587,6 +726,42 @@ struct idpf_txq_group {
u32 num_completions_pending;
};
/**
* idpf_size_to_txd_count - Get number of descriptors needed for large Tx frag
* @size: transmit request size in bytes
*
* In the case where a large frag (>= 16K) needs to be split across multiple
* descriptors, we need to assume that we can have no more than 12K of data
* per descriptor due to hardware alignment restrictions (4K alignment).
*/
static inline u32 idpf_size_to_txd_count(unsigned int size)
{
return DIV_ROUND_UP(size, IDPF_TX_MAX_DESC_DATA_ALIGNED);
}
void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc,
struct idpf_tx_splitq_params *params,
u16 td_cmd, u16 size);
void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
struct idpf_tx_splitq_params *params,
u16 td_cmd, u16 size);
/**
* idpf_tx_splitq_build_desc - determine which type of data descriptor to build
* @desc: descriptor to populate
* @params: pointer to tx params struct
* @td_cmd: command to be filled in desc
* @size: size of buffer
*/
static inline void idpf_tx_splitq_build_desc(union idpf_tx_flex_desc *desc,
struct idpf_tx_splitq_params *params,
u16 td_cmd, u16 size)
{
if (params->dtype == IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2)
idpf_tx_splitq_build_ctb(desc, params, td_cmd, size);
else
idpf_tx_splitq_build_flow_desc(desc, params, td_cmd, size);
}
/**
* idpf_alloc_page - Allocate a new RX buffer from the page pool
* @pool: page_pool to allocate from
......@@ -634,6 +809,8 @@ void idpf_deinit_rss(struct idpf_vport *vport);
int idpf_rx_bufs_init_all(struct idpf_vport *vport);
bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf);
void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val);
netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
struct net_device *netdev);
bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq,
u16 cleaned_count);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment