Commit 410a619a authored by David S. Miller's avatar David S. Miller

Merge branch 'qed-Add-iWARP-support-for-unaligned-MPA-packets'

Michal Kalderon says:

====================
qed: Add iWARP support for unaligned MPA packets

This patch series adds support for handling unaligned MPA packets.
(FPDUs split over more than one tcp packet).
When FW detects a packet is unaligned it fowards the packet to
the driver via a light l2 dedicated connection. The driver then
stores this packet until the remainder of the packet is received.
Once the driver reconstructs the full FPDU, it sends it down
to fw via the ll2 connection. Driver also breaks down any packed
PDUs into separate packets for FW.

Patches 1-6 are all slight modifications to ll2 to support additional
requirements for the unaligned MPA ll2 client.

Patch 7 opens the additional ll2 connection for iWARP.
Patches 8-12 contain the algorithm for aligning packets.
====================
Signed-off-by: default avatarMichal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: default avatarAriel Elior <Ariel.Elior@cavium.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 90561843 1e28eaad
......@@ -1415,7 +1415,12 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn)
void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
{
struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1);
kfree(iwarp_info->mpa_bufs);
kfree(iwarp_info->partial_fpdus);
kfree(iwarp_info->mpa_intermediate_buf);
}
int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams)
......@@ -1713,6 +1718,569 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
return 0;
}
static struct qed_iwarp_fpdu *qed_iwarp_get_curr_fpdu(struct qed_hwfn *p_hwfn,
u16 cid)
{
struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
struct qed_iwarp_fpdu *partial_fpdu;
u32 idx;
idx = cid - qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_IWARP);
if (idx >= iwarp_info->max_num_partial_fpdus) {
DP_ERR(p_hwfn, "Invalid cid %x max_num_partial_fpdus=%x\n", cid,
iwarp_info->max_num_partial_fpdus);
return NULL;
}
partial_fpdu = &iwarp_info->partial_fpdus[idx];
return partial_fpdu;
}
enum qed_iwarp_mpa_pkt_type {
QED_IWARP_MPA_PKT_PACKED,
QED_IWARP_MPA_PKT_PARTIAL,
QED_IWARP_MPA_PKT_UNALIGNED
};
#define QED_IWARP_INVALID_FPDU_LENGTH 0xffff
#define QED_IWARP_MPA_FPDU_LENGTH_SIZE (2)
#define QED_IWARP_MPA_CRC32_DIGEST_SIZE (4)
/* Pad to multiple of 4 */
#define QED_IWARP_PDU_DATA_LEN_WITH_PAD(data_len) ALIGN(data_len, 4)
#define QED_IWARP_FPDU_LEN_WITH_PAD(_mpa_len) \
(QED_IWARP_PDU_DATA_LEN_WITH_PAD((_mpa_len) + \
QED_IWARP_MPA_FPDU_LENGTH_SIZE) + \
QED_IWARP_MPA_CRC32_DIGEST_SIZE)
/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
#define QED_IWARP_MAX_BDS_PER_FPDU 3
char *pkt_type_str[] = {
"QED_IWARP_MPA_PKT_PACKED",
"QED_IWARP_MPA_PKT_PARTIAL",
"QED_IWARP_MPA_PKT_UNALIGNED"
};
static int
qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn,
struct qed_iwarp_fpdu *fpdu,
struct qed_iwarp_ll2_buff *buf);
static enum qed_iwarp_mpa_pkt_type
qed_iwarp_mpa_classify(struct qed_hwfn *p_hwfn,
struct qed_iwarp_fpdu *fpdu,
u16 tcp_payload_len, u8 *mpa_data)
{
enum qed_iwarp_mpa_pkt_type pkt_type;
u16 mpa_len;
if (fpdu->incomplete_bytes) {
pkt_type = QED_IWARP_MPA_PKT_UNALIGNED;
goto out;
}
/* special case of one byte remaining...
* lower byte will be read next packet
*/
if (tcp_payload_len == 1) {
fpdu->fpdu_length = *mpa_data << BITS_PER_BYTE;
pkt_type = QED_IWARP_MPA_PKT_PARTIAL;
goto out;
}
mpa_len = ntohs(*((u16 *)(mpa_data)));
fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
if (fpdu->fpdu_length <= tcp_payload_len)
pkt_type = QED_IWARP_MPA_PKT_PACKED;
else
pkt_type = QED_IWARP_MPA_PKT_PARTIAL;
out:
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"MPA_ALIGN: %s: fpdu_length=0x%x tcp_payload_len:0x%x\n",
pkt_type_str[pkt_type], fpdu->fpdu_length, tcp_payload_len);
return pkt_type;
}
static void
qed_iwarp_init_fpdu(struct qed_iwarp_ll2_buff *buf,
struct qed_iwarp_fpdu *fpdu,
struct unaligned_opaque_data *pkt_data,
u16 tcp_payload_size, u8 placement_offset)
{
fpdu->mpa_buf = buf;
fpdu->pkt_hdr = buf->data_phys_addr + placement_offset;
fpdu->pkt_hdr_size = pkt_data->tcp_payload_offset;
fpdu->mpa_frag = buf->data_phys_addr + pkt_data->first_mpa_offset;
fpdu->mpa_frag_virt = (u8 *)(buf->data) + pkt_data->first_mpa_offset;
if (tcp_payload_size == 1)
fpdu->incomplete_bytes = QED_IWARP_INVALID_FPDU_LENGTH;
else if (tcp_payload_size < fpdu->fpdu_length)
fpdu->incomplete_bytes = fpdu->fpdu_length - tcp_payload_size;
else
fpdu->incomplete_bytes = 0; /* complete fpdu */
fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes;
}
static int
qed_iwarp_cp_pkt(struct qed_hwfn *p_hwfn,
struct qed_iwarp_fpdu *fpdu,
struct unaligned_opaque_data *pkt_data,
struct qed_iwarp_ll2_buff *buf, u16 tcp_payload_size)
{
u8 *tmp_buf = p_hwfn->p_rdma_info->iwarp.mpa_intermediate_buf;
int rc;
/* need to copy the data from the partial packet stored in fpdu
* to the new buf, for this we also need to move the data currently
* placed on the buf. The assumption is that the buffer is big enough
* since fpdu_length <= mss, we use an intermediate buffer since
* we may need to copy the new data to an overlapping location
*/
if ((fpdu->mpa_frag_len + tcp_payload_size) > (u16)buf->buff_size) {
DP_ERR(p_hwfn,
"MPA ALIGN: Unexpected: buffer is not large enough for split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n",
buf->buff_size, fpdu->mpa_frag_len,
tcp_payload_size, fpdu->incomplete_bytes);
return -EINVAL;
}
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"MPA ALIGN Copying fpdu: [%p, %d] [%p, %d]\n",
fpdu->mpa_frag_virt, fpdu->mpa_frag_len,
(u8 *)(buf->data) + pkt_data->first_mpa_offset,
tcp_payload_size);
memcpy(tmp_buf, fpdu->mpa_frag_virt, fpdu->mpa_frag_len);
memcpy(tmp_buf + fpdu->mpa_frag_len,
(u8 *)(buf->data) + pkt_data->first_mpa_offset,
tcp_payload_size);
rc = qed_iwarp_recycle_pkt(p_hwfn, fpdu, fpdu->mpa_buf);
if (rc)
return rc;
/* If we managed to post the buffer copy the data to the new buffer
* o/w this will occur in the next round...
*/
memcpy((u8 *)(buf->data), tmp_buf,
fpdu->mpa_frag_len + tcp_payload_size);
fpdu->mpa_buf = buf;
/* fpdu->pkt_hdr remains as is */
/* fpdu->mpa_frag is overridden with new buf */
fpdu->mpa_frag = buf->data_phys_addr;
fpdu->mpa_frag_virt = buf->data;
fpdu->mpa_frag_len += tcp_payload_size;
fpdu->incomplete_bytes -= tcp_payload_size;
DP_VERBOSE(p_hwfn,
QED_MSG_RDMA,
"MPA ALIGN: split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n",
buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size,
fpdu->incomplete_bytes);
return 0;
}
static void
qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn,
struct qed_iwarp_fpdu *fpdu, u8 *mpa_data)
{
u16 mpa_len;
/* Update incomplete packets if needed */
if (fpdu->incomplete_bytes == QED_IWARP_INVALID_FPDU_LENGTH) {
/* Missing lower byte is now available */
mpa_len = fpdu->fpdu_length | *mpa_data;
fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
fpdu->mpa_frag_len = fpdu->fpdu_length;
/* one byte of hdr */
fpdu->incomplete_bytes = fpdu->fpdu_length - 1;
DP_VERBOSE(p_hwfn,
QED_MSG_RDMA,
"MPA_ALIGN: Partial header mpa_len=%x fpdu_length=%x incomplete_bytes=%x\n",
mpa_len, fpdu->fpdu_length, fpdu->incomplete_bytes);
}
}
#define QED_IWARP_IS_RIGHT_EDGE(_curr_pkt) \
(GET_FIELD((_curr_pkt)->flags, \
UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE))
/* This function is used to recycle a buffer using the ll2 drop option. It
* uses the mechanism to ensure that all buffers posted to tx before this one
* were completed. The buffer sent here will be sent as a cookie in the tx
* completion function and can then be reposted to rx chain when done. The flow
* that requires this is the flow where a FPDU splits over more than 3 tcp
* segments. In this case the driver needs to re-post a rx buffer instead of
* the one received, but driver can't simply repost a buffer it copied from
* as there is a case where the buffer was originally a packed FPDU, and is
* partially posted to FW. Driver needs to ensure FW is done with it.
*/
static int
qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn,
struct qed_iwarp_fpdu *fpdu,
struct qed_iwarp_ll2_buff *buf)
{
struct qed_ll2_tx_pkt_info tx_pkt;
u8 ll2_handle;
int rc;
memset(&tx_pkt, 0, sizeof(tx_pkt));
tx_pkt.num_of_bds = 1;
tx_pkt.tx_dest = QED_LL2_TX_DEST_DROP;
tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2;
tx_pkt.first_frag = fpdu->pkt_hdr;
tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
buf->piggy_buf = NULL;
tx_pkt.cookie = buf;
ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
if (rc)
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"Can't drop packet rc=%d\n", rc);
DP_VERBOSE(p_hwfn,
QED_MSG_RDMA,
"MPA_ALIGN: send drop tx packet [%lx, 0x%x], buf=%p, rc=%d\n",
(unsigned long int)tx_pkt.first_frag,
tx_pkt.first_frag_len, buf, rc);
return rc;
}
static int
qed_iwarp_win_right_edge(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu)
{
struct qed_ll2_tx_pkt_info tx_pkt;
u8 ll2_handle;
int rc;
memset(&tx_pkt, 0, sizeof(tx_pkt));
tx_pkt.num_of_bds = 1;
tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2;
tx_pkt.first_frag = fpdu->pkt_hdr;
tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
tx_pkt.enable_ip_cksum = true;
tx_pkt.enable_l4_cksum = true;
tx_pkt.calc_ip_len = true;
/* vlan overload with enum iwarp_ll2_tx_queues */
tx_pkt.vlan = IWARP_LL2_ALIGNED_RIGHT_TRIMMED_TX_QUEUE;
ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
if (rc)
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"Can't send right edge rc=%d\n", rc);
DP_VERBOSE(p_hwfn,
QED_MSG_RDMA,
"MPA_ALIGN: Sent right edge FPDU num_bds=%d [%lx, 0x%x], rc=%d\n",
tx_pkt.num_of_bds,
(unsigned long int)tx_pkt.first_frag,
tx_pkt.first_frag_len, rc);
return rc;
}
static int
qed_iwarp_send_fpdu(struct qed_hwfn *p_hwfn,
struct qed_iwarp_fpdu *fpdu,
struct unaligned_opaque_data *curr_pkt,
struct qed_iwarp_ll2_buff *buf,
u16 tcp_payload_size, enum qed_iwarp_mpa_pkt_type pkt_type)
{
struct qed_ll2_tx_pkt_info tx_pkt;
u8 ll2_handle;
int rc;
memset(&tx_pkt, 0, sizeof(tx_pkt));
/* An unaligned packet means it's split over two tcp segments. So the
* complete packet requires 3 bds, one for the header, one for the
* part of the fpdu of the first tcp segment, and the last fragment
* will point to the remainder of the fpdu. A packed pdu, requires only
* two bds, one for the header and one for the data.
*/
tx_pkt.num_of_bds = (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED) ? 3 : 2;
tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; /* offset in words */
/* Send the mpa_buf only with the last fpdu (in case of packed) */
if (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED ||
tcp_payload_size <= fpdu->fpdu_length)
tx_pkt.cookie = fpdu->mpa_buf;
tx_pkt.first_frag = fpdu->pkt_hdr;
tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
tx_pkt.enable_ip_cksum = true;
tx_pkt.enable_l4_cksum = true;
tx_pkt.calc_ip_len = true;
/* vlan overload with enum iwarp_ll2_tx_queues */
tx_pkt.vlan = IWARP_LL2_ALIGNED_TX_QUEUE;
/* special case of unaligned packet and not packed, need to send
* both buffers as cookie to release.
*/
if (tcp_payload_size == fpdu->incomplete_bytes)
fpdu->mpa_buf->piggy_buf = buf;
ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
/* Set first fragment to header */
rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
if (rc)
goto out;
/* Set second fragment to first part of packet */
rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn, ll2_handle,
fpdu->mpa_frag,
fpdu->mpa_frag_len);
if (rc)
goto out;
if (!fpdu->incomplete_bytes)
goto out;
/* Set third fragment to second part of the packet */
rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn,
ll2_handle,
buf->data_phys_addr +
curr_pkt->first_mpa_offset,
fpdu->incomplete_bytes);
out:
DP_VERBOSE(p_hwfn,
QED_MSG_RDMA,
"MPA_ALIGN: Sent FPDU num_bds=%d first_frag_len=%x, mpa_frag_len=0x%x, incomplete_bytes:0x%x rc=%d\n",
tx_pkt.num_of_bds,
tx_pkt.first_frag_len,
fpdu->mpa_frag_len,
fpdu->incomplete_bytes, rc);
return rc;
}
static void
qed_iwarp_mpa_get_data(struct qed_hwfn *p_hwfn,
struct unaligned_opaque_data *curr_pkt,
u32 opaque_data0, u32 opaque_data1)
{
u64 opaque_data;
opaque_data = HILO_64(opaque_data1, opaque_data0);
*curr_pkt = *((struct unaligned_opaque_data *)&opaque_data);
curr_pkt->first_mpa_offset = curr_pkt->tcp_payload_offset +
le16_to_cpu(curr_pkt->first_mpa_offset);
curr_pkt->cid = le32_to_cpu(curr_pkt->cid);
}
/* This function is called when an unaligned or incomplete MPA packet arrives
* driver needs to align the packet, perhaps using previous data and send
* it down to FW once it is aligned.
*/
static int
qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn,
struct qed_iwarp_ll2_mpa_buf *mpa_buf)
{
struct unaligned_opaque_data *curr_pkt = &mpa_buf->data;
struct qed_iwarp_ll2_buff *buf = mpa_buf->ll2_buf;
enum qed_iwarp_mpa_pkt_type pkt_type;
struct qed_iwarp_fpdu *fpdu;
int rc = -EINVAL;
u8 *mpa_data;
fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, curr_pkt->cid & 0xffff);
if (!fpdu) { /* something corrupt with cid, post rx back */
DP_ERR(p_hwfn, "Invalid cid, drop and post back to rx cid=%x\n",
curr_pkt->cid);
goto err;
}
do {
mpa_data = ((u8 *)(buf->data) + curr_pkt->first_mpa_offset);
pkt_type = qed_iwarp_mpa_classify(p_hwfn, fpdu,
mpa_buf->tcp_payload_len,
mpa_data);
switch (pkt_type) {
case QED_IWARP_MPA_PKT_PARTIAL:
qed_iwarp_init_fpdu(buf, fpdu,
curr_pkt,
mpa_buf->tcp_payload_len,
mpa_buf->placement_offset);
if (!QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) {
mpa_buf->tcp_payload_len = 0;
break;
}
rc = qed_iwarp_win_right_edge(p_hwfn, fpdu);
if (rc) {
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"Can't send FPDU:reset rc=%d\n", rc);
memset(fpdu, 0, sizeof(*fpdu));
break;
}
mpa_buf->tcp_payload_len = 0;
break;
case QED_IWARP_MPA_PKT_PACKED:
qed_iwarp_init_fpdu(buf, fpdu,
curr_pkt,
mpa_buf->tcp_payload_len,
mpa_buf->placement_offset);
rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf,
mpa_buf->tcp_payload_len,
pkt_type);
if (rc) {
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"Can't send FPDU:reset rc=%d\n", rc);
memset(fpdu, 0, sizeof(*fpdu));
break;
}
mpa_buf->tcp_payload_len -= fpdu->fpdu_length;
curr_pkt->first_mpa_offset += fpdu->fpdu_length;
break;
case QED_IWARP_MPA_PKT_UNALIGNED:
qed_iwarp_update_fpdu_length(p_hwfn, fpdu, mpa_data);
if (mpa_buf->tcp_payload_len < fpdu->incomplete_bytes) {
/* special handling of fpdu split over more
* than 2 segments
*/
if (QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) {
rc = qed_iwarp_win_right_edge(p_hwfn,
fpdu);
/* packet will be re-processed later */
if (rc)
return rc;
}
rc = qed_iwarp_cp_pkt(p_hwfn, fpdu, curr_pkt,
buf,
mpa_buf->tcp_payload_len);
if (rc) /* packet will be re-processed later */
return rc;
mpa_buf->tcp_payload_len = 0;
break;
}
rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf,
mpa_buf->tcp_payload_len,
pkt_type);
if (rc) {
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"Can't send FPDU:delay rc=%d\n", rc);
/* don't reset fpdu -> we need it for next
* classify
*/
break;
}
mpa_buf->tcp_payload_len -= fpdu->incomplete_bytes;
curr_pkt->first_mpa_offset += fpdu->incomplete_bytes;
/* The framed PDU was sent - no more incomplete bytes */
fpdu->incomplete_bytes = 0;
break;
}
} while (mpa_buf->tcp_payload_len && !rc);
return rc;
err:
qed_iwarp_ll2_post_rx(p_hwfn,
buf,
p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle);
return rc;
}
static void qed_iwarp_process_pending_pkts(struct qed_hwfn *p_hwfn)
{
struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
struct qed_iwarp_ll2_mpa_buf *mpa_buf = NULL;
int rc;
while (!list_empty(&iwarp_info->mpa_buf_pending_list)) {
mpa_buf = list_first_entry(&iwarp_info->mpa_buf_pending_list,
struct qed_iwarp_ll2_mpa_buf,
list_entry);
rc = qed_iwarp_process_mpa_pkt(p_hwfn, mpa_buf);
/* busy means break and continue processing later, don't
* remove the buf from the pending list.
*/
if (rc == -EBUSY)
break;
list_del(&mpa_buf->list_entry);
list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_list);
if (rc) { /* different error, don't continue */
DP_NOTICE(p_hwfn, "process pkts failed rc=%d\n", rc);
break;
}
}
}
static void
qed_iwarp_ll2_comp_mpa_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
{
struct qed_iwarp_ll2_mpa_buf *mpa_buf;
struct qed_iwarp_info *iwarp_info;
struct qed_hwfn *p_hwfn = cxt;
iwarp_info = &p_hwfn->p_rdma_info->iwarp;
mpa_buf = list_first_entry(&iwarp_info->mpa_buf_list,
struct qed_iwarp_ll2_mpa_buf, list_entry);
if (!mpa_buf) {
DP_ERR(p_hwfn, "No free mpa buf\n");
goto err;
}
list_del(&mpa_buf->list_entry);
qed_iwarp_mpa_get_data(p_hwfn, &mpa_buf->data,
data->opaque_data_0, data->opaque_data_1);
DP_VERBOSE(p_hwfn,
QED_MSG_RDMA,
"LL2 MPA CompRx payload_len:0x%x\tfirst_mpa_offset:0x%x\ttcp_payload_offset:0x%x\tflags:0x%x\tcid:0x%x\n",
data->length.packet_length, mpa_buf->data.first_mpa_offset,
mpa_buf->data.tcp_payload_offset, mpa_buf->data.flags,
mpa_buf->data.cid);
mpa_buf->ll2_buf = data->cookie;
mpa_buf->tcp_payload_len = data->length.packet_length -
mpa_buf->data.first_mpa_offset;
mpa_buf->data.first_mpa_offset += data->u.placement_offset;
mpa_buf->placement_offset = data->u.placement_offset;
list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_pending_list);
qed_iwarp_process_pending_pkts(p_hwfn);
return;
err:
qed_iwarp_ll2_post_rx(p_hwfn, data->cookie,
iwarp_info->ll2_mpa_handle);
}
static void
qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
{
......@@ -1855,10 +2423,25 @@ static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle,
bool b_last_fragment, bool b_last_packet)
{
struct qed_iwarp_ll2_buff *buffer = cookie;
struct qed_iwarp_ll2_buff *piggy;
struct qed_hwfn *p_hwfn = cxt;
if (!buffer) /* can happen in packed mpa unaligned... */
return;
/* this was originally an rx packet, post it back */
piggy = buffer->piggy_buf;
if (piggy) {
buffer->piggy_buf = NULL;
qed_iwarp_ll2_post_rx(p_hwfn, piggy, connection_handle);
}
qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle);
if (connection_handle == p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle)
qed_iwarp_process_pending_pkts(p_hwfn);
return;
}
static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
......@@ -1871,12 +2454,44 @@ static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
if (!buffer)
return;
if (buffer->piggy_buf) {
dma_free_coherent(&p_hwfn->cdev->pdev->dev,
buffer->piggy_buf->buff_size,
buffer->piggy_buf->data,
buffer->piggy_buf->data_phys_addr);
kfree(buffer->piggy_buf);
}
dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
buffer->data, buffer->data_phys_addr);
kfree(buffer);
}
/* The only slowpath for iwarp ll2 is unalign flush. When this completion
* is received, need to reset the FPDU.
*/
void
qed_iwarp_ll2_slowpath(void *cxt,
u8 connection_handle,
u32 opaque_data_0, u32 opaque_data_1)
{
struct unaligned_opaque_data unalign_data;
struct qed_hwfn *p_hwfn = cxt;
struct qed_iwarp_fpdu *fpdu;
qed_iwarp_mpa_get_data(p_hwfn, &unalign_data,
opaque_data_0, opaque_data_1);
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "(0x%x) Flush fpdu\n",
unalign_data.cid);
fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, (u16)unalign_data.cid);
if (fpdu)
memset(fpdu, 0, sizeof(*fpdu));
}
static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
......@@ -1902,6 +2517,16 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL;
}
if (iwarp_info->ll2_mpa_handle != QED_IWARP_HANDLE_INVAL) {
rc = qed_ll2_terminate_connection(p_hwfn,
iwarp_info->ll2_mpa_handle);
if (rc)
DP_INFO(p_hwfn, "Failed to terminate mpa connection\n");
qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_mpa_handle);
iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
}
qed_llh_remove_mac_filter(p_hwfn,
p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
return rc;
......@@ -1953,12 +2578,15 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
struct qed_iwarp_info *iwarp_info;
struct qed_ll2_acquire_data data;
struct qed_ll2_cbs cbs;
u32 mpa_buff_size;
u16 n_ooo_bufs;
int rc = 0;
int i;
iwarp_info = &p_hwfn->p_rdma_info->iwarp;
iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL;
iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
iwarp_info->max_mtu = params->max_mtu;
......@@ -2029,6 +2657,68 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
if (rc)
goto err;
/* Start Unaligned MPA connection */
cbs.rx_comp_cb = qed_iwarp_ll2_comp_mpa_pkt;
cbs.slowpath_cb = qed_iwarp_ll2_slowpath;
memset(&data, 0, sizeof(data));
data.input.conn_type = QED_LL2_TYPE_IWARP;
data.input.mtu = params->max_mtu;
/* FW requires that once a packet arrives OOO, it must have at
* least 2 rx buffers available on the unaligned connection
* for handling the case that it is a partial fpdu.
*/
data.input.rx_num_desc = n_ooo_bufs * 2;
data.input.tx_num_desc = data.input.rx_num_desc;
data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU;
data.p_connection_handle = &iwarp_info->ll2_mpa_handle;
data.input.secondary_queue = true;
data.cbs = &cbs;
rc = qed_ll2_acquire_connection(p_hwfn, &data);
if (rc)
goto err;
rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_mpa_handle);
if (rc)
goto err;
mpa_buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu);
rc = qed_iwarp_ll2_alloc_buffers(p_hwfn,
data.input.rx_num_desc,
mpa_buff_size,
iwarp_info->ll2_mpa_handle);
if (rc)
goto err;
iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps,
sizeof(*iwarp_info->partial_fpdus),
GFP_KERNEL);
if (!iwarp_info->partial_fpdus)
goto err;
iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps;
iwarp_info->mpa_intermediate_buf = kzalloc(mpa_buff_size, GFP_KERNEL);
if (!iwarp_info->mpa_intermediate_buf)
goto err;
/* The mpa_bufs array serves for pending RX packets received on the
* mpa ll2 that don't have place on the tx ring and require later
* processing. We can't fail on allocation of such a struct therefore
* we allocate enough to take care of all rx packets
*/
iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc,
sizeof(*iwarp_info->mpa_bufs),
GFP_KERNEL);
if (!iwarp_info->mpa_bufs)
goto err;
INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list);
INIT_LIST_HEAD(&iwarp_info->mpa_buf_list);
for (i = 0; i < data.input.rx_num_desc; i++)
list_add_tail(&iwarp_info->mpa_bufs[i].list_entry,
&iwarp_info->mpa_buf_list);
return rc;
err:
qed_iwarp_ll2_stop(p_hwfn, p_ptt);
......
......@@ -55,15 +55,43 @@ enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state);
#define QED_IWARP_HANDLE_INVAL (0xff)
struct qed_iwarp_ll2_buff {
struct qed_iwarp_ll2_buff *piggy_buf;
void *data;
dma_addr_t data_phys_addr;
u32 buff_size;
};
struct qed_iwarp_ll2_mpa_buf {
struct list_head list_entry;
struct qed_iwarp_ll2_buff *ll2_buf;
struct unaligned_opaque_data data;
u16 tcp_payload_len;
u8 placement_offset;
};
/* In some cases a fpdu will arrive with only one byte of the header, in this
* case the fpdu_length will be partial (contain only higher byte and
* incomplete bytes will contain the invalid value
*/
#define QED_IWARP_INVALID_INCOMPLETE_BYTES 0xffff
struct qed_iwarp_fpdu {
struct qed_iwarp_ll2_buff *mpa_buf;
void *mpa_frag_virt;
dma_addr_t mpa_frag;
dma_addr_t pkt_hdr;
u16 mpa_frag_len;
u16 fpdu_length;
u16 incomplete_bytes;
u8 pkt_hdr_size;
};
struct qed_iwarp_info {
struct list_head listen_list; /* qed_iwarp_listener */
struct list_head ep_list; /* qed_iwarp_ep */
struct list_head ep_free_list; /* pre-allocated ep's */
struct list_head mpa_buf_list; /* list of mpa_bufs */
struct list_head mpa_buf_pending_list;
spinlock_t iw_lock; /* for iwarp resources */
spinlock_t qp_lock; /* for teardown races */
u32 rcv_wnd_scale;
......@@ -73,9 +101,14 @@ struct qed_iwarp_info {
u8 tcp_flags;
u8 ll2_syn_handle;
u8 ll2_ooo_handle;
u8 ll2_mpa_handle;
u8 peer2peer;
enum mpa_negotiation_mode mpa_rev;
enum mpa_rtr_type rtr_type;
struct qed_iwarp_fpdu *partial_fpdus;
struct qed_iwarp_ll2_mpa_buf *mpa_bufs;
u8 *mpa_intermediate_buf;
u16 max_num_partial_fpdus;
};
enum qed_iwarp_ep_state {
......
......@@ -422,6 +422,41 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn,
data->u.placement_offset = p_cqe->rx_cqe_fp.placement_offset;
}
static int
qed_ll2_handle_slowpath(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_conn,
union core_rx_cqe_union *p_cqe,
unsigned long *p_lock_flags)
{
struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
struct core_rx_slow_path_cqe *sp_cqe;
sp_cqe = &p_cqe->rx_cqe_sp;
if (sp_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) {
DP_NOTICE(p_hwfn,
"LL2 - unexpected Rx CQE slowpath ramrod_cmd_id:%d\n",
sp_cqe->ramrod_cmd_id);
return -EINVAL;
}
if (!p_ll2_conn->cbs.slowpath_cb) {
DP_NOTICE(p_hwfn,
"LL2 - received RX_QUEUE_FLUSH but no callback was provided\n");
return -EINVAL;
}
spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
p_ll2_conn->cbs.slowpath_cb(p_ll2_conn->cbs.cookie,
p_ll2_conn->my_id,
le32_to_cpu(sp_cqe->opaque_data.data[0]),
le32_to_cpu(sp_cqe->opaque_data.data[1]));
spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
return 0;
}
static int
qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_conn,
......@@ -495,8 +530,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
switch (cqe->rx_cqe_sp.type) {
case CORE_RX_CQE_TYPE_SLOW_PATH:
DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n");
rc = -EINVAL;
rc = qed_ll2_handle_slowpath(p_hwfn, p_ll2_conn,
cqe, &flags);
break;
case CORE_RX_CQE_TYPE_GSI_OFFLOAD:
case CORE_RX_CQE_TYPE_REGULAR:
......@@ -894,7 +929,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
p_ramrod->queue_id = p_ll2_conn->queue_id;
p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
p_ramrod->main_func_queue = p_ll2_conn->main_func_queue ? 1 : 0;
if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
......@@ -1105,6 +1140,7 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_info)
{
struct qed_ll2_tx_packet *p_descq;
u32 desc_size;
u32 capacity;
int rc = 0;
......@@ -1122,13 +1158,17 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
goto out;
capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain);
p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet),
GFP_KERNEL);
/* First element is part of the packet, rest are flexibly added */
desc_size = (sizeof(*p_descq) +
(p_ll2_info->input.tx_max_bds_per_packet - 1) *
sizeof(p_descq->bds_set));
p_descq = kcalloc(capacity, desc_size, GFP_KERNEL);
if (!p_descq) {
rc = -ENOMEM;
goto out;
}
p_ll2_info->tx_queue.descq_array = p_descq;
p_ll2_info->tx_queue.descq_mem = p_descq;
DP_VERBOSE(p_hwfn, QED_MSG_LL2,
"Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n",
......@@ -1209,6 +1249,7 @@ qed_ll2_set_cbs(struct qed_ll2_info *p_ll2_info, const struct qed_ll2_cbs *cbs)
p_ll2_info->cbs.rx_release_cb = cbs->rx_release_cb;
p_ll2_info->cbs.tx_comp_cb = cbs->tx_comp_cb;
p_ll2_info->cbs.tx_release_cb = cbs->tx_release_cb;
p_ll2_info->cbs.slowpath_cb = cbs->slowpath_cb;
p_ll2_info->cbs.cookie = cbs->cookie;
return 0;
......@@ -1260,6 +1301,11 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
p_ll2_info->tx_dest = (data->input.tx_dest == QED_LL2_TX_DEST_NW) ?
CORE_TX_DEST_NW : CORE_TX_DEST_LB;
if (data->input.conn_type == QED_LL2_TYPE_OOO ||
data->input.secondary_queue)
p_ll2_info->main_func_queue = false;
else
p_ll2_info->main_func_queue = true;
/* Correct maximum number of Tx BDs */
p_tx_max = &p_ll2_info->input.tx_max_bds_per_packet;
......@@ -1359,11 +1405,13 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
{
struct qed_hwfn *p_hwfn = cxt;
struct qed_ll2_info *p_ll2_conn;
struct qed_ll2_tx_packet *p_pkt;
struct qed_ll2_rx_queue *p_rx;
struct qed_ll2_tx_queue *p_tx;
struct qed_ptt *p_ptt;
int rc = -EINVAL;
u32 i, capacity;
u32 desc_size;
u8 qid;
p_ptt = qed_ptt_acquire(p_hwfn);
......@@ -1397,9 +1445,15 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
INIT_LIST_HEAD(&p_tx->sending_descq);
spin_lock_init(&p_tx->lock);
capacity = qed_chain_get_capacity(&p_tx->txq_chain);
for (i = 0; i < capacity; i++)
list_add_tail(&p_tx->descq_array[i].list_entry,
&p_tx->free_descq);
/* First element is part of the packet, rest are flexibly added */
desc_size = (sizeof(*p_pkt) +
(p_ll2_conn->input.tx_max_bds_per_packet - 1) *
sizeof(p_pkt->bds_set));
for (i = 0; i < capacity; i++) {
p_pkt = p_tx->descq_mem + desc_size * i;
list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
}
p_tx->cur_completing_bd_idx = 0;
p_tx->bds_idx = 0;
p_tx->b_completing_packet = false;
......@@ -1579,10 +1633,27 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
roce_flavor = (pkt->qed_roce_flavor == QED_LL2_ROCE) ? CORE_ROCE
: CORE_RROCE;
tx_dest = (pkt->tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW
: CORE_TX_DEST_LB;
switch (pkt->tx_dest) {
case QED_LL2_TX_DEST_NW:
tx_dest = CORE_TX_DEST_NW;
break;
case QED_LL2_TX_DEST_LB:
tx_dest = CORE_TX_DEST_LB;
break;
case QED_LL2_TX_DEST_DROP:
tx_dest = CORE_TX_DEST_DROP;
break;
default:
tx_dest = CORE_TX_DEST_LB;
break;
}
start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
if (QED_IS_IWARP_PERSONALITY(p_hwfn) &&
p_ll2->input.conn_type == QED_LL2_TYPE_OOO)
start_bd->nw_vlan_or_lb_echo =
cpu_to_le16(IWARP_LL2_IN_ORDER_TX_QUEUE);
else
start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan);
SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W,
cpu_to_le16(pkt->l4_hdr_offset_w));
......@@ -1591,6 +1662,9 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1);
SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds);
SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor);
SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_CSUM, !!(pkt->enable_ip_cksum));
SET_FIELD(bd_data, CORE_TX_BD_DATA_L4_CSUM, !!(pkt->enable_l4_cksum));
SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_LEN, !!(pkt->calc_ip_len));
start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data);
DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag);
start_bd->nbytes = cpu_to_le16(pkt->first_frag_len);
......@@ -1698,7 +1772,7 @@ int qed_ll2_prepare_tx_packet(void *cxt,
p_tx = &p_ll2_conn->tx_queue;
p_tx_chain = &p_tx->txq_chain;
if (pkt->num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET)
if (pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet)
return -EIO;
spin_lock_irqsave(&p_tx->lock, flags);
......@@ -1858,7 +1932,7 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle)
qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
}
kfree(p_ll2_conn->tx_queue.descq_array);
kfree(p_ll2_conn->tx_queue.descq_mem);
qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain);
kfree(p_ll2_conn->rx_queue.descq_array);
......
......@@ -63,17 +63,14 @@ struct qed_ll2_rx_packet {
struct qed_ll2_tx_packet {
struct list_head list_entry;
u16 bd_used;
u16 vlan;
u16 l4_hdr_offset_w;
u8 bd_flags;
bool notify_fw;
void *cookie;
/* Flexible Array of bds_set determined by max_bds_per_packet */
struct {
struct core_tx_bd *txq_bd;
dma_addr_t tx_frag;
u16 frag_len;
} bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET];
} bds_set[1];
};
struct qed_ll2_rx_queue {
......@@ -101,7 +98,7 @@ struct qed_ll2_tx_queue {
struct list_head active_descq;
struct list_head free_descq;
struct list_head sending_descq;
struct qed_ll2_tx_packet *descq_array;
void *descq_mem; /* memory for variable sized qed_ll2_tx_packet*/
struct qed_ll2_tx_packet *cur_send_packet;
struct qed_ll2_tx_packet cur_completing_packet;
u16 cur_completing_bd_idx;
......@@ -124,6 +121,7 @@ struct qed_ll2_info {
bool b_active;
enum core_tx_dest tx_dest;
u8 tx_stats_en;
bool main_func_queue;
struct qed_ll2_rx_queue rx_queue;
struct qed_ll2_tx_queue tx_queue;
struct qed_ll2_cbs cbs;
......
......@@ -64,6 +64,7 @@ enum qed_ll2_roce_flavor_type {
enum qed_ll2_tx_dest {
QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */
QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */
QED_LL2_TX_DEST_DROP, /* Light L2 Drop the TX packet */
QED_LL2_TX_DEST_MAX
};
......@@ -150,11 +151,16 @@ void (*qed_ll2_release_tx_packet_cb)(void *cxt,
dma_addr_t first_frag_addr,
bool b_last_fragment, bool b_last_packet);
typedef
void (*qed_ll2_slowpath_cb)(void *cxt, u8 connection_handle,
u32 opaque_data_0, u32 opaque_data_1);
struct qed_ll2_cbs {
qed_ll2_complete_rx_packet_cb rx_comp_cb;
qed_ll2_release_rx_packet_cb rx_release_cb;
qed_ll2_complete_tx_packet_cb tx_comp_cb;
qed_ll2_release_tx_packet_cb tx_release_cb;
qed_ll2_slowpath_cb slowpath_cb;
void *cookie;
};
......@@ -171,6 +177,7 @@ struct qed_ll2_acquire_data_inputs {
enum qed_ll2_tx_dest tx_dest;
enum qed_ll2_error_handle ai_err_packet_too_big;
enum qed_ll2_error_handle ai_err_no_buf;
bool secondary_queue;
u8 gsi_enable;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment