Commit 50f08edf authored by Toke Høiland-Jørgensen's avatar Toke Høiland-Jørgensen Committed by Kalle Valo

ath9k: Switch to using mac80211 intermediate software queues.

This switches ath9k over to using the mac80211 intermediate software
queueing mechanism for data packets. It removes the queueing inside the
driver, except for the retry queue, and instead pulls from mac80211 when
a packet is needed. The retry queue is used to store a packet that was
pulled but can't be sent immediately.

The old code path in ath_tx_start that would queue packets has been
removed completely, as has the qlen limit tunables (since there's no
longer a queue in the driver to limit).

The mac80211 intermediate software queues offer significant latency
reductions, and this patch allows ath9k to realise them. The exact gains
from this varies with the test scenario, but in an access point scenario
we have seen latency reductions ranging from 1/3 to as much as an order
of magnitude. We also achieve slightly better aggregation.

Median latency (ping) figures with this patch applied at the access point,
with two high-rate stations and one low-rate station (HT20 5Ghz), running
a Flent rtt_fair_var_up test with one TCP flow and one ping flow going to
each station:

                                 Fast station        Slow station
Default pfifo_fast qdisc:            430.4 ms            638.7 ms
fq_codel qdisc on iface:              35.5 ms            211.8 ms
This patch set:                       22.4 ms             38.2 ms

Median aggregation sizes over the same test:

Default pfifo_fast qdisc:            9.5 pkts            1.9 pkts
fq_codel qdisc on iface:            11.2 pkts            1.9 pkts
This patch set:                     13.9 pkts            1.9 pkts

This patch is based on Tim's original patch set, but reworked quite
thoroughly.

Cc: Tim Shepard <shep@alum.mit.edu>
Cc: Felix Fietkau <nbd@nbd.name>
Signed-off-by: default avatarToke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: default avatarKalle Valo <kvalo@qca.qualcomm.com>
parent 14acebc3
......@@ -91,7 +91,6 @@ int ath_descdma_setup(struct ath_softc *sc, struct ath_descdma *dd,
#define ATH_RXBUF 512
#define ATH_TXBUF 512
#define ATH_TXBUF_RESERVE 5
#define ATH_MAX_QDEPTH (ATH_TXBUF / 4 - ATH_TXBUF_RESERVE)
#define ATH_TXMAXTRY 13
#define ATH_MAX_SW_RETRIES 30
......@@ -145,7 +144,7 @@ int ath_descdma_setup(struct ath_softc *sc, struct ath_descdma *dd,
#define BAW_WITHIN(_start, _bawsz, _seqno) \
((((_seqno) - (_start)) & 4095) < (_bawsz))
#define ATH_AN_2_TID(_an, _tidno) (&(_an)->tid[(_tidno)])
#define ATH_AN_2_TID(_an, _tidno) ath_node_to_tid(_an, _tidno)
#define IS_HT_RATE(rate) (rate & 0x80)
#define IS_CCK_RATE(rate) ((rate >= 0x18) && (rate <= 0x1e))
......@@ -164,7 +163,6 @@ struct ath_txq {
spinlock_t axq_lock;
u32 axq_depth;
u32 axq_ampdu_depth;
bool stopped;
bool axq_tx_inprogress;
struct list_head txq_fifo[ATH_TXFIFO_DEPTH];
u8 txq_headidx;
......@@ -232,7 +230,6 @@ struct ath_buf {
struct ath_atx_tid {
struct list_head list;
struct sk_buff_head buf_q;
struct sk_buff_head retry_q;
struct ath_node *an;
struct ath_txq *txq;
......@@ -247,13 +244,13 @@ struct ath_atx_tid {
s8 bar_index;
bool active;
bool clear_ps_filter;
bool has_queued;
};
struct ath_node {
struct ath_softc *sc;
struct ieee80211_sta *sta; /* station struct we're part of */
struct ieee80211_vif *vif; /* interface with which we're associated */
struct ath_atx_tid tid[IEEE80211_NUM_TIDS];
u16 maxampdu;
u8 mpdudensity;
......@@ -276,7 +273,6 @@ struct ath_tx_control {
struct ath_node *an;
struct ieee80211_sta *sta;
u8 paprd;
bool force_channel;
};
......@@ -293,7 +289,6 @@ struct ath_tx {
struct ath_descdma txdma;
struct ath_txq *txq_map[IEEE80211_NUM_ACS];
struct ath_txq *uapsdq;
u32 txq_max_pending[IEEE80211_NUM_ACS];
u16 max_aggr_framelen[IEEE80211_NUM_ACS][4][32];
};
......@@ -421,6 +416,22 @@ struct ath_offchannel {
int duration;
};
static inline struct ath_atx_tid *
ath_node_to_tid(struct ath_node *an, u8 tidno)
{
struct ieee80211_sta *sta = an->sta;
struct ieee80211_vif *vif = an->vif;
struct ieee80211_txq *txq;
BUG_ON(!vif);
if (sta)
txq = sta->txq[tidno % ARRAY_SIZE(sta->txq)];
else
txq = vif->txq;
return (struct ath_atx_tid *) txq->drv_priv;
}
#define case_rtn_string(val) case val: return #val
#define ath_for_each_chanctx(_sc, _ctx) \
......@@ -575,7 +586,6 @@ void ath_tx_edma_tasklet(struct ath_softc *sc);
int ath_tx_aggr_start(struct ath_softc *sc, struct ieee80211_sta *sta,
u16 tid, u16 *ssn);
void ath_tx_aggr_stop(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid);
void ath_tx_aggr_resume(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid);
void ath_tx_aggr_wakeup(struct ath_softc *sc, struct ath_node *an);
void ath_tx_aggr_sleep(struct ieee80211_sta *sta, struct ath_softc *sc,
......@@ -585,6 +595,7 @@ void ath9k_release_buffered_frames(struct ieee80211_hw *hw,
u16 tids, int nframes,
enum ieee80211_frame_release_type reason,
bool more_data);
void ath9k_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *queue);
/********/
/* VIFs */
......
......@@ -1010,7 +1010,6 @@ static void ath_scan_send_probe(struct ath_softc *sc,
goto error;
txctl.txq = sc->tx.txq_map[IEEE80211_AC_VO];
txctl.force_channel = true;
if (ath_tx_start(sc->hw, skb, &txctl))
goto error;
......@@ -1133,7 +1132,6 @@ ath_chanctx_send_vif_ps_frame(struct ath_softc *sc, struct ath_vif *avp,
memset(&txctl, 0, sizeof(txctl));
txctl.txq = sc->tx.txq_map[IEEE80211_AC_VO];
txctl.sta = sta;
txctl.force_channel = true;
if (ath_tx_start(sc->hw, skb, &txctl)) {
ieee80211_free_txskb(sc->hw, skb);
return false;
......
......@@ -600,7 +600,6 @@ static int read_file_xmit(struct seq_file *file, void *data)
PR("MPDUs XRetried: ", xretries);
PR("Aggregates: ", a_aggr);
PR("AMPDUs Queued HW:", a_queued_hw);
PR("AMPDUs Queued SW:", a_queued_sw);
PR("AMPDUs Completed:", a_completed);
PR("AMPDUs Retried: ", a_retries);
PR("AMPDUs XRetried: ", a_xretries);
......@@ -629,8 +628,7 @@ static void print_queue(struct ath_softc *sc, struct ath_txq *txq,
seq_printf(file, "%s: %d ", "qnum", txq->axq_qnum);
seq_printf(file, "%s: %2d ", "qdepth", txq->axq_depth);
seq_printf(file, "%s: %2d ", "ampdu-depth", txq->axq_ampdu_depth);
seq_printf(file, "%s: %3d ", "pending", txq->pending_frames);
seq_printf(file, "%s: %d\n", "stopped", txq->stopped);
seq_printf(file, "%s: %3d\n", "pending", txq->pending_frames);
ath_txq_unlock(sc, txq);
}
......@@ -1208,7 +1206,6 @@ static const char ath9k_gstrings_stats[][ETH_GSTRING_LEN] = {
AMKSTR(d_tx_mpdu_xretries),
AMKSTR(d_tx_aggregates),
AMKSTR(d_tx_ampdus_queued_hw),
AMKSTR(d_tx_ampdus_queued_sw),
AMKSTR(d_tx_ampdus_completed),
AMKSTR(d_tx_ampdu_retries),
AMKSTR(d_tx_ampdu_xretries),
......@@ -1288,7 +1285,6 @@ void ath9k_get_et_stats(struct ieee80211_hw *hw,
AWDATA(xretries);
AWDATA(a_aggr);
AWDATA(a_queued_hw);
AWDATA(a_queued_sw);
AWDATA(a_completed);
AWDATA(a_retries);
AWDATA(a_xretries);
......@@ -1346,14 +1342,6 @@ int ath9k_init_debug(struct ath_hw *ah)
read_file_xmit);
debugfs_create_devm_seqfile(sc->dev, "queues", sc->debug.debugfs_phy,
read_file_queues);
debugfs_create_u32("qlen_bk", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
&sc->tx.txq_max_pending[IEEE80211_AC_BK]);
debugfs_create_u32("qlen_be", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
&sc->tx.txq_max_pending[IEEE80211_AC_BE]);
debugfs_create_u32("qlen_vi", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
&sc->tx.txq_max_pending[IEEE80211_AC_VI]);
debugfs_create_u32("qlen_vo", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
&sc->tx.txq_max_pending[IEEE80211_AC_VO]);
debugfs_create_devm_seqfile(sc->dev, "misc", sc->debug.debugfs_phy,
read_file_misc);
debugfs_create_devm_seqfile(sc->dev, "reset", sc->debug.debugfs_phy,
......
......@@ -147,7 +147,6 @@ struct ath_interrupt_stats {
* @completed: Total MPDUs (non-aggr) completed
* @a_aggr: Total no. of aggregates queued
* @a_queued_hw: Total AMPDUs queued to hardware
* @a_queued_sw: Total AMPDUs queued to software queues
* @a_completed: Total AMPDUs completed
* @a_retries: No. of AMPDUs retried (SW)
* @a_xretries: No. of AMPDUs dropped due to xretries
......@@ -174,7 +173,6 @@ struct ath_tx_stats {
u32 xretries;
u32 a_aggr;
u32 a_queued_hw;
u32 a_queued_sw;
u32 a_completed;
u32 a_retries;
u32 a_xretries;
......
......@@ -52,8 +52,8 @@ static ssize_t read_file_node_aggr(struct file *file, char __user *user_buf,
"TID", "SEQ_START", "SEQ_NEXT", "BAW_SIZE",
"BAW_HEAD", "BAW_TAIL", "BAR_IDX", "SCHED", "PAUSED");
for (tidno = 0, tid = &an->tid[tidno];
tidno < IEEE80211_NUM_TIDS; tidno++, tid++) {
for (tidno = 0; tidno < IEEE80211_NUM_TIDS; tidno++) {
tid = ath_node_to_tid(an, tidno);
txq = tid->txq;
ath_txq_lock(sc, txq);
if (tid->active) {
......
......@@ -360,7 +360,6 @@ static int ath9k_init_queues(struct ath_softc *sc)
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
sc->tx.txq_map[i] = ath_txq_setup(sc, ATH9K_TX_QUEUE_DATA, i);
sc->tx.txq_map[i]->mac80211_qnum = i;
sc->tx.txq_max_pending[i] = ATH_MAX_QDEPTH;
}
return 0;
}
......@@ -919,6 +918,7 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw)
hw->max_rate_tries = 10;
hw->sta_data_size = sizeof(struct ath_node);
hw->vif_data_size = sizeof(struct ath_vif);
hw->txq_data_size = sizeof(struct ath_atx_tid);
hw->extra_tx_headroom = 4;
hw->wiphy->available_antennas_rx = BIT(ah->caps.max_rxchains) - 1;
......
......@@ -1902,9 +1902,11 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw,
bool flush = false;
int ret = 0;
struct ieee80211_sta *sta = params->sta;
struct ath_node *an = (struct ath_node *)sta->drv_priv;
enum ieee80211_ampdu_mlme_action action = params->action;
u16 tid = params->tid;
u16 *ssn = &params->ssn;
struct ath_atx_tid *atid;
mutex_lock(&sc->mutex);
......@@ -1937,9 +1939,9 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw,
ath9k_ps_restore(sc);
break;
case IEEE80211_AMPDU_TX_OPERATIONAL:
ath9k_ps_wakeup(sc);
ath_tx_aggr_resume(sc, sta, tid);
ath9k_ps_restore(sc);
atid = ath_node_to_tid(an, tid);
atid->baw_size = IEEE80211_MIN_AMPDU_BUF <<
sta->ht_cap.ampdu_factor;
break;
default:
ath_err(ath9k_hw_common(sc->sc_ah), "Unknown AMPDU action\n");
......@@ -2701,4 +2703,5 @@ struct ieee80211_ops ath9k_ops = {
.sw_scan_start = ath9k_sw_scan_start,
.sw_scan_complete = ath9k_sw_scan_complete,
.get_txpower = ath9k_get_txpower,
.wake_tx_queue = ath9k_wake_tx_queue,
};
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment