Commit c8334512 authored by Wen Gong's avatar Wen Gong Committed by Kalle Valo

ath10k: add htt TX bundle for sdio

The transmission utilization ratio for sdio bus for small packet is
slow, because the space and time cost for sdio bus is same for large
length packet and small length packet. So the speed of data for large
length packet is higher than small length.

Test result of different length of data:

data packet(byte)   cost time(us)   calculated rate(Mbps)
      256               28                73
      512               33               124
     1024               35               234
     1792               45               318
    14336              168               682
    28672              333               688
    57344              660               695

This patch change the TX packet from single packet to a large length
bundle packet, max size is 32, it results in significant performance
improvement on TX path.

Also there's a fourth thread "ath10k_tx_complete_wq" added to ath10k as it
improves TCP RX throughput (values in Mbps):

                                       TCP-RX    TCP-TX    UDP-RX      UDP-TX
use workqueue_tx_complete              423       357       448         412
change it to ar->workqueue             410       360       449         414
change it to ar->workqueue_aux         405       339       446         401

This patch only effect sdio chip, it will not effect PCI, SNOC etc.
It only enable bundle for sdio chip.

Tested with QCA6174 SDIO with firmware
WLAN.RMH.4.4.1-00017-QCARMSWP-1.
Signed-off-by: default avatarWen Gong <wgong@codeaurora.org>
Signed-off-by: default avatarKalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410061400.14231-2-wgong@codeaurora.org
parent d8170934
...@@ -3288,6 +3288,11 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, ...@@ -3288,6 +3288,11 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
if (!ar->workqueue_aux) if (!ar->workqueue_aux)
goto err_free_wq; goto err_free_wq;
ar->workqueue_tx_complete =
create_singlethread_workqueue("ath10k_tx_complete_wq");
if (!ar->workqueue_tx_complete)
goto err_free_aux_wq;
mutex_init(&ar->conf_mutex); mutex_init(&ar->conf_mutex);
mutex_init(&ar->dump_mutex); mutex_init(&ar->dump_mutex);
spin_lock_init(&ar->data_lock); spin_lock_init(&ar->data_lock);
...@@ -3315,7 +3320,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, ...@@ -3315,7 +3320,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
ret = ath10k_coredump_create(ar); ret = ath10k_coredump_create(ar);
if (ret) if (ret)
goto err_free_aux_wq; goto err_free_tx_complete;
ret = ath10k_debug_create(ar); ret = ath10k_debug_create(ar);
if (ret) if (ret)
...@@ -3325,12 +3330,12 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, ...@@ -3325,12 +3330,12 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
err_free_coredump: err_free_coredump:
ath10k_coredump_destroy(ar); ath10k_coredump_destroy(ar);
err_free_tx_complete:
destroy_workqueue(ar->workqueue_tx_complete);
err_free_aux_wq: err_free_aux_wq:
destroy_workqueue(ar->workqueue_aux); destroy_workqueue(ar->workqueue_aux);
err_free_wq: err_free_wq:
destroy_workqueue(ar->workqueue); destroy_workqueue(ar->workqueue);
err_free_mac: err_free_mac:
ath10k_mac_destroy(ar); ath10k_mac_destroy(ar);
...@@ -3346,6 +3351,9 @@ void ath10k_core_destroy(struct ath10k *ar) ...@@ -3346,6 +3351,9 @@ void ath10k_core_destroy(struct ath10k *ar)
flush_workqueue(ar->workqueue_aux); flush_workqueue(ar->workqueue_aux);
destroy_workqueue(ar->workqueue_aux); destroy_workqueue(ar->workqueue_aux);
flush_workqueue(ar->workqueue_tx_complete);
destroy_workqueue(ar->workqueue_tx_complete);
ath10k_debug_destroy(ar); ath10k_debug_destroy(ar);
ath10k_coredump_destroy(ar); ath10k_coredump_destroy(ar);
ath10k_htt_tx_destroy(&ar->htt); ath10k_htt_tx_destroy(&ar->htt);
......
...@@ -1091,7 +1091,7 @@ struct ath10k { ...@@ -1091,7 +1091,7 @@ struct ath10k {
struct workqueue_struct *workqueue; struct workqueue_struct *workqueue;
/* Auxiliary workqueue */ /* Auxiliary workqueue */
struct workqueue_struct *workqueue_aux; struct workqueue_struct *workqueue_aux;
struct workqueue_struct *workqueue_tx_complete;
/* prevents concurrent FW reconfiguration */ /* prevents concurrent FW reconfiguration */
struct mutex conf_mutex; struct mutex conf_mutex;
...@@ -1132,6 +1132,8 @@ struct ath10k { ...@@ -1132,6 +1132,8 @@ struct ath10k {
struct work_struct register_work; struct work_struct register_work;
struct work_struct restart_work; struct work_struct restart_work;
struct work_struct bundle_tx_work;
struct work_struct tx_complete_work;
/* cycle count is reported twice for each visited channel during scan. /* cycle count is reported twice for each visited channel during scan.
* access protected by data_lock * access protected by data_lock
......
This diff is collapsed.
...@@ -83,8 +83,14 @@ struct ath10k_htc_hdr { ...@@ -83,8 +83,14 @@ struct ath10k_htc_hdr {
u8 seq_no; /* for tx */ u8 seq_no; /* for tx */
u8 control_byte1; u8 control_byte1;
} __packed; } __packed;
union {
__le16 pad_len;
struct {
u8 pad0; u8 pad0;
u8 pad1; u8 pad1;
} __packed;
} __packed;
} __packed __aligned(4); } __packed __aligned(4);
enum ath10k_ath10k_htc_msg_id { enum ath10k_ath10k_htc_msg_id {
...@@ -121,6 +127,10 @@ enum ath10k_htc_conn_svc_status { ...@@ -121,6 +127,10 @@ enum ath10k_htc_conn_svc_status {
ATH10K_HTC_CONN_SVC_STATUS_NO_MORE_EP = 4 ATH10K_HTC_CONN_SVC_STATUS_NO_MORE_EP = 4
}; };
#define ATH10K_MAX_MSG_PER_HTC_TX_BUNDLE 32
#define ATH10K_MIN_MSG_PER_HTC_TX_BUNDLE 2
#define ATH10K_MIN_CREDIT_PER_HTC_TX_BUNDLE 2
enum ath10k_htc_setup_complete_flags { enum ath10k_htc_setup_complete_flags {
ATH10K_HTC_SETUP_COMPLETE_FLAGS_RX_BNDL_EN = 1 ATH10K_HTC_SETUP_COMPLETE_FLAGS_RX_BNDL_EN = 1
}; };
...@@ -353,7 +363,12 @@ struct ath10k_htc_ep { ...@@ -353,7 +363,12 @@ struct ath10k_htc_ep {
u8 seq_no; /* for debugging */ u8 seq_no; /* for debugging */
int tx_credits; int tx_credits;
int tx_credit_size;
bool tx_credit_flow_enabled; bool tx_credit_flow_enabled;
bool bundle_tx;
struct sk_buff_head tx_req_head;
struct sk_buff_head tx_complete_head;
}; };
struct ath10k_htc_svc_tx_credits { struct ath10k_htc_svc_tx_credits {
...@@ -382,6 +397,7 @@ struct ath10k_htc { ...@@ -382,6 +397,7 @@ struct ath10k_htc {
int ath10k_htc_init(struct ath10k *ar); int ath10k_htc_init(struct ath10k *ar);
int ath10k_htc_wait_target(struct ath10k_htc *htc); int ath10k_htc_wait_target(struct ath10k_htc *htc);
void ath10k_htc_setup_tx_req(struct ath10k_htc_ep *ep);
int ath10k_htc_start(struct ath10k_htc *htc); int ath10k_htc_start(struct ath10k_htc *htc);
int ath10k_htc_connect_service(struct ath10k_htc *htc, int ath10k_htc_connect_service(struct ath10k_htc *htc,
struct ath10k_htc_svc_conn_req *conn_req, struct ath10k_htc_svc_conn_req *conn_req,
...@@ -391,6 +407,10 @@ void ath10k_htc_change_tx_credit_flow(struct ath10k_htc *htc, ...@@ -391,6 +407,10 @@ void ath10k_htc_change_tx_credit_flow(struct ath10k_htc *htc,
bool enable); bool enable);
int ath10k_htc_send(struct ath10k_htc *htc, enum ath10k_htc_ep_id eid, int ath10k_htc_send(struct ath10k_htc *htc, enum ath10k_htc_ep_id eid,
struct sk_buff *packet); struct sk_buff *packet);
void ath10k_htc_stop_hl(struct ath10k *ar);
int ath10k_htc_send_hl(struct ath10k_htc *htc, enum ath10k_htc_ep_id eid,
struct sk_buff *packet);
struct sk_buff *ath10k_htc_alloc_skb(struct ath10k *ar, int size); struct sk_buff *ath10k_htc_alloc_skb(struct ath10k *ar, int size);
void ath10k_htc_tx_completion_handler(struct ath10k *ar, struct sk_buff *skb); void ath10k_htc_tx_completion_handler(struct ath10k *ar, struct sk_buff *skb);
void ath10k_htc_rx_completion_handler(struct ath10k *ar, struct sk_buff *skb); void ath10k_htc_rx_completion_handler(struct ath10k *ar, struct sk_buff *skb);
......
...@@ -135,6 +135,8 @@ int ath10k_htt_connect(struct ath10k_htt *htt) ...@@ -135,6 +135,8 @@ int ath10k_htt_connect(struct ath10k_htt *htt)
{ {
struct ath10k_htc_svc_conn_req conn_req; struct ath10k_htc_svc_conn_req conn_req;
struct ath10k_htc_svc_conn_resp conn_resp; struct ath10k_htc_svc_conn_resp conn_resp;
struct ath10k *ar = htt->ar;
struct ath10k_htc_ep *ep;
int status; int status;
memset(&conn_req, 0, sizeof(conn_req)); memset(&conn_req, 0, sizeof(conn_req));
...@@ -142,6 +144,7 @@ int ath10k_htt_connect(struct ath10k_htt *htt) ...@@ -142,6 +144,7 @@ int ath10k_htt_connect(struct ath10k_htt *htt)
conn_req.ep_ops.ep_tx_complete = ath10k_htt_htc_tx_complete; conn_req.ep_ops.ep_tx_complete = ath10k_htt_htc_tx_complete;
conn_req.ep_ops.ep_rx_complete = ath10k_htt_htc_t2h_msg_handler; conn_req.ep_ops.ep_rx_complete = ath10k_htt_htc_t2h_msg_handler;
conn_req.ep_ops.ep_tx_credits = ath10k_htt_op_ep_tx_credits;
/* connect to control service */ /* connect to control service */
conn_req.service_id = ATH10K_HTC_SVC_ID_HTT_DATA_MSG; conn_req.service_id = ATH10K_HTC_SVC_ID_HTT_DATA_MSG;
...@@ -154,6 +157,11 @@ int ath10k_htt_connect(struct ath10k_htt *htt) ...@@ -154,6 +157,11 @@ int ath10k_htt_connect(struct ath10k_htt *htt)
htt->eid = conn_resp.eid; htt->eid = conn_resp.eid;
if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) {
ep = &ar->htc.endpoint[htt->eid];
ath10k_htc_setup_tx_req(ep);
}
htt->disable_tx_comp = ath10k_hif_get_htt_tx_complete(htt->ar); htt->disable_tx_comp = ath10k_hif_get_htt_tx_complete(htt->ar);
if (htt->disable_tx_comp) if (htt->disable_tx_comp)
ath10k_htc_change_tx_credit_flow(&htt->ar->htc, htt->eid, true); ath10k_htc_change_tx_credit_flow(&htt->ar->htc, htt->eid, true);
......
...@@ -2032,6 +2032,9 @@ struct ath10k_htt { ...@@ -2032,6 +2032,9 @@ struct ath10k_htt {
const struct ath10k_htt_tx_ops *tx_ops; const struct ath10k_htt_tx_ops *tx_ops;
const struct ath10k_htt_rx_ops *rx_ops; const struct ath10k_htt_rx_ops *rx_ops;
bool disable_tx_comp; bool disable_tx_comp;
bool bundle_tx;
struct sk_buff_head tx_req_head;
struct sk_buff_head tx_complete_head;
}; };
struct ath10k_htt_tx_ops { struct ath10k_htt_tx_ops {
...@@ -2278,6 +2281,7 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar, ...@@ -2278,6 +2281,7 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
__le16 fetch_seq_num, __le16 fetch_seq_num,
struct htt_tx_fetch_record *records, struct htt_tx_fetch_record *records,
size_t num_records); size_t num_records);
void ath10k_htt_op_ep_tx_credits(struct ath10k *ar);
void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw, void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
struct ieee80211_txq *txq); struct ieee80211_txq *txq);
......
...@@ -3919,6 +3919,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) ...@@ -3919,6 +3919,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
ath10k_dbg(ar, ATH10K_DBG_HTT, ath10k_dbg(ar, ATH10K_DBG_HTT,
"htt credit total %d\n", "htt credit total %d\n",
ep->tx_credits); ep->tx_credits);
ep->ep_ops.ep_tx_credits(htc->ar);
} }
break; break;
} }
......
...@@ -531,6 +531,7 @@ void ath10k_htt_tx_destroy(struct ath10k_htt *htt) ...@@ -531,6 +531,7 @@ void ath10k_htt_tx_destroy(struct ath10k_htt *htt)
void ath10k_htt_tx_stop(struct ath10k_htt *htt) void ath10k_htt_tx_stop(struct ath10k_htt *htt)
{ {
ath10k_htc_stop_hl(htt->ar);
idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar); idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar);
idr_destroy(&htt->pending_tx); idr_destroy(&htt->pending_tx);
} }
...@@ -541,6 +542,11 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt) ...@@ -541,6 +542,11 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt)
ath10k_htt_tx_destroy(htt); ath10k_htt_tx_destroy(htt);
} }
void ath10k_htt_op_ep_tx_credits(struct ath10k *ar)
{
queue_work(ar->workqueue, &ar->bundle_tx_work);
}
void ath10k_htt_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb) void ath10k_htt_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb)
{ {
struct ath10k_htt *htt = &ar->htt; struct ath10k_htt *htt = &ar->htt;
...@@ -1379,7 +1385,7 @@ static int ath10k_htt_tx_hl(struct ath10k_htt *htt, enum ath10k_hw_txrx_mode txm ...@@ -1379,7 +1385,7 @@ static int ath10k_htt_tx_hl(struct ath10k_htt *htt, enum ath10k_hw_txrx_mode txm
*/ */
tx_desc->peerid = __cpu_to_le32(HTT_INVALID_PEERID); tx_desc->peerid = __cpu_to_le32(HTT_INVALID_PEERID);
res = ath10k_htc_send(&htt->ar->htc, htt->eid, msdu); res = ath10k_htc_send_hl(&htt->ar->htc, htt->eid, msdu);
out: out:
return res; return res;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment