ath9k: defer ath_tx_setup_buffer setup to the first tx attempt during aggr

With sequence number and buffer allocation deferred to when they're needed
for the first time, it becomes much easier to start dropping packets from
the tid queue if necessary, e.g. when latency suddenly increases. This can
lead to some future improvements in buffer management for better latency.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index bd52361..68066c5 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -48,8 +48,9 @@
 #define IS_HT_RATE(_rate)     ((_rate) & 0x80)
 
 static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq,
-			       struct ath_atx_tid *tid,
-			       struct list_head *bf_head);
+			       struct ath_atx_tid *tid, struct sk_buff *skb);
+static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
+			    int tx_flags, struct ath_txq *txq);
 static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
 				struct ath_txq *txq, struct list_head *bf_q,
 				struct ath_tx_status *ts, int txok, int sendbar);
@@ -61,6 +62,10 @@
 			     int txok, bool update_rc);
 static void ath_tx_update_baw(struct ath_softc *sc, struct ath_atx_tid *tid,
 			      int seqno);
+static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc,
+					   struct ath_txq *txq,
+					   struct ath_atx_tid *tid,
+					   struct sk_buff *skb);
 
 enum {
 	MCS_HT20,
@@ -164,14 +169,13 @@
 		fi = get_frame_info(skb);
 		bf = fi->bf;
 
-		list_add_tail(&bf->list, &bf_head);
-
 		spin_unlock_bh(&txq->axq_lock);
-		if (fi->retries) {
+		if (bf && fi->retries) {
+			list_add_tail(&bf->list, &bf_head);
 			ath_tx_update_baw(sc, tid, bf->bf_state.seqno);
 			ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0, 1);
 		} else {
-			ath_tx_send_normal(sc, txq, NULL, &bf_head);
+			ath_tx_send_normal(sc, txq, NULL, skb);
 		}
 		spin_lock_bh(&txq->axq_lock);
 	}
@@ -234,6 +238,13 @@
 		fi = get_frame_info(skb);
 		bf = fi->bf;
 
+		if (!bf) {
+			spin_unlock(&txq->axq_lock);
+			ath_tx_complete(sc, skb, ATH_TX_ERROR, txq);
+			spin_lock(&txq->axq_lock);
+			continue;
+		}
+
 		list_add_tail(&bf->list, &bf_head);
 
 		if (fi->retries)
@@ -760,8 +771,14 @@
 		skb = skb_peek(&tid->buf_q);
 		fi = get_frame_info(skb);
 		bf = fi->bf;
-		seqno = bf->bf_state.seqno;
+		if (!fi->bf)
+			bf = ath_tx_setup_buffer(sc, txq, tid, skb);
 
+		if (!bf)
+			continue;
+
+		bf->bf_state.bf_type |= BUF_AMPDU;
+		seqno = bf->bf_state.seqno;
 		if (!bf_first)
 			bf_first = bf;
 
@@ -1434,13 +1451,11 @@
 }
 
 static void ath_tx_send_ampdu(struct ath_softc *sc, struct ath_atx_tid *tid,
-			      struct ath_buf *bf, struct ath_tx_control *txctl)
+			      struct sk_buff *skb, struct ath_tx_control *txctl)
 {
-	struct ath_frame_info *fi = get_frame_info(bf->bf_mpdu);
+	struct ath_frame_info *fi = get_frame_info(skb);
 	struct list_head bf_head;
-	u16 seqno = bf->bf_state.seqno;
-
-	bf->bf_state.bf_type |= BUF_AMPDU;
+	struct ath_buf *bf;
 
 	/*
 	 * Do not queue to h/w when any of the following conditions is true:
@@ -1450,25 +1465,29 @@
 	 * - h/w queue depth exceeds low water mark
 	 */
 	if (!skb_queue_empty(&tid->buf_q) || tid->paused ||
-	    !BAW_WITHIN(tid->seq_start, tid->baw_size, seqno) ||
+	    !BAW_WITHIN(tid->seq_start, tid->baw_size, tid->seq_next) ||
 	    txctl->txq->axq_ampdu_depth >= ATH_AGGR_MIN_QDEPTH) {
 		/*
 		 * Add this frame to software queue for scheduling later
 		 * for aggregation.
 		 */
 		TX_STAT_INC(txctl->txq->axq_qnum, a_queued_sw);
-		__skb_queue_tail(&tid->buf_q, bf->bf_mpdu);
+		__skb_queue_tail(&tid->buf_q, skb);
 		if (!txctl->an || !txctl->an->sleeping)
 			ath_tx_queue_tid(txctl->txq, tid);
 		return;
 	}
 
+	bf = ath_tx_setup_buffer(sc, txctl->txq, tid, skb);
+	if (!bf)
+		return;
+
+	bf->bf_state.bf_type |= BUF_AMPDU;
 	INIT_LIST_HEAD(&bf_head);
 	list_add(&bf->list, &bf_head);
 
 	/* Add sub-frame to BAW */
-	if (!fi->retries)
-		ath_tx_addto_baw(sc, tid, seqno);
+	ath_tx_addto_baw(sc, tid, bf->bf_state.seqno);
 
 	/* Queue to h/w without aggregation */
 	TX_STAT_INC(txctl->txq->axq_qnum, a_queued_hw);
@@ -1478,13 +1497,21 @@
 }
 
 static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq,
-			       struct ath_atx_tid *tid,
-			       struct list_head *bf_head)
+			       struct ath_atx_tid *tid, struct sk_buff *skb)
 {
-	struct ath_frame_info *fi;
+	struct ath_frame_info *fi = get_frame_info(skb);
+	struct list_head bf_head;
 	struct ath_buf *bf;
 
-	bf = list_first_entry(bf_head, struct ath_buf, list);
+	bf = fi->bf;
+	if (!bf)
+		bf = ath_tx_setup_buffer(sc, txq, tid, skb);
+
+	if (!bf)
+		return;
+
+	INIT_LIST_HEAD(&bf_head);
+	list_add_tail(&bf->list, &bf_head);
 	bf->bf_state.bf_type &= ~BUF_AMPDU;
 
 	/* update starting sequence number for subsequent ADDBA request */
@@ -1492,9 +1519,8 @@
 		INCR(tid->seq_start, IEEE80211_SEQ_MAX);
 
 	bf->bf_lastbf = bf;
-	fi = get_frame_info(bf->bf_mpdu);
 	ath_buf_set_rate(sc, bf, fi->framelen);
-	ath_tx_txqaddbuf(sc, txq, bf_head, false);
+	ath_tx_txqaddbuf(sc, txq, &bf_head, false);
 	TX_STAT_INC(txq->axq_qnum, queued);
 }
 
@@ -1717,6 +1743,10 @@
 
 }
 
+/*
+ * Assign a descriptor (and sequence number if necessary,
+ * and map buffer for DMA. Frees skb on error
+ */
 static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc,
 					   struct ath_txq *txq,
 					   struct ath_atx_tid *tid,
@@ -1734,7 +1764,7 @@
 	bf = ath_tx_get_buffer(sc);
 	if (!bf) {
 		ath_dbg(common, ATH_DBG_XMIT, "TX buffers are full\n");
-		return NULL;
+		goto error;
 	}
 
 	ATH_TXBUF_RESET(bf);
@@ -1757,7 +1787,7 @@
 		ath_err(ath9k_hw_common(sc->sc_ah),
 			"dma_mapping_error() on TX\n");
 		ath_tx_return_buffer(sc, bf);
-		return NULL;
+		goto error;
 	}
 
 	frm_type = get_hw_packet_type(skb);
@@ -1779,18 +1809,20 @@
 	fi->bf = bf;
 
 	return bf;
+
+error:
+	dev_kfree_skb_any(skb);
+	return NULL;
 }
 
 /* FIXME: tx power */
-static int ath_tx_start_dma(struct ath_softc *sc, struct sk_buff *skb,
+static void ath_tx_start_dma(struct ath_softc *sc, struct sk_buff *skb,
 			     struct ath_tx_control *txctl)
 {
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct list_head bf_head;
 	struct ath_atx_tid *tid = NULL;
 	struct ath_buf *bf;
-	int ret = 0;
 	u8 tidno;
 
 	spin_lock_bh(&txctl->txq->axq_lock);
@@ -1803,21 +1835,16 @@
 		WARN_ON(tid->ac->txq != txctl->txq);
 	}
 
-	bf = ath_tx_setup_buffer(sc, txctl->txq, tid, skb);
-	if (unlikely(!bf)) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	if ((tx_info->flags & IEEE80211_TX_CTL_AMPDU) && tid) {
 		/*
 		 * Try aggregation if it's a unicast data frame
 		 * and the destination is HT capable.
 		 */
-		ath_tx_send_ampdu(sc, tid, bf, txctl);
+		ath_tx_send_ampdu(sc, tid, skb, txctl);
 	} else {
-		INIT_LIST_HEAD(&bf_head);
-		list_add_tail(&bf->list, &bf_head);
+		bf = ath_tx_setup_buffer(sc, txctl->txq, tid, skb);
+		if (!bf)
+			goto out;
 
 		bf->bf_state.bfs_paprd = txctl->paprd;
 
@@ -1831,12 +1858,11 @@
 		if (tx_info->flags & IEEE80211_TX_CTL_CLEAR_PS_FILT)
 			ath9k_hw_set_clrdmask(sc->sc_ah, bf->bf_desc, true);
 
-		ath_tx_send_normal(sc, txctl->txq, tid, &bf_head);
+		ath_tx_send_normal(sc, txctl->txq, tid, skb);
 	}
 
 out:
 	spin_unlock_bh(&txctl->txq->axq_lock);
-	return ret;
 }
 
 /* Upon failure caller should free skb */
@@ -1904,7 +1930,8 @@
 	}
 	spin_unlock_bh(&txq->axq_lock);
 
-	return ath_tx_start_dma(sc, skb, txctl);
+	ath_tx_start_dma(sc, skb, txctl);
+	return 0;
 }
 
 /*****************/