From 95e633efbd1b4ffbbfc2d8abba2b05291f6e9903 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 9 Feb 2024 20:47:39 +0100 Subject: [PATCH] mac80211: add AQL support for broadcast/multicast packets Should improve performance/reliability with lots of mcast packets Signed-off-by: Felix Fietkau --- ...dd-AQL-support-for-broadcast-packets.patch | 226 ++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch diff --git a/package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch b/package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch new file mode 100644 index 0000000000..6f64467b66 --- /dev/null +++ b/package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch @@ -0,0 +1,226 @@ +From: Felix Fietkau +Date: Fri, 9 Feb 2024 19:43:40 +0100 +Subject: [PATCH] mac80211: add AQL support for broadcast packets + +Excessive broadcast traffic with little competing unicast traffic can easily +flood hardware queues, leading to throughput issues. Additionally, filling +the hardware queues with too many packets breaks FQ for broadcast data. +Fix this by enabling AQL for broadcast packets. + +Signed-off-by: Felix Fietkau +--- + +--- a/include/net/cfg80211.h ++++ b/include/net/cfg80211.h +@@ -3324,6 +3324,7 @@ enum wiphy_params_flags { + /* The per TXQ device queue limit in airtime */ + #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000 + #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000 ++#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC 50000 + + /* The per interface airtime threshold to switch to lower queue limit */ + #define IEEE80211_AQL_THRESHOLD 24000 +--- a/net/mac80211/debugfs.c ++++ b/net/mac80211/debugfs.c +@@ -215,11 +215,13 @@ static ssize_t aql_pending_read(struct f + "VI %u us\n" + "BE %u us\n" + "BK %u us\n" ++ "BC/MC %u us\n" + "total %u us\n", + atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]), + atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]), + atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]), + atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]), ++ atomic_read(&local->aql_bc_pending_airtime), + atomic_read(&local->aql_total_pending_airtime)); + return simple_read_from_buffer(user_buf, count, ppos, + buf, len); +@@ -245,7 +247,8 @@ static ssize_t aql_txq_limit_read(struct + "VO %u %u\n" + "VI %u %u\n" + "BE %u %u\n" +- "BK %u %u\n", ++ "BK %u %u\n" ++ "BC/MC %u\n", + local->aql_txq_limit_low[IEEE80211_AC_VO], + local->aql_txq_limit_high[IEEE80211_AC_VO], + local->aql_txq_limit_low[IEEE80211_AC_VI], +@@ -253,7 +256,8 @@ static ssize_t aql_txq_limit_read(struct + local->aql_txq_limit_low[IEEE80211_AC_BE], + local->aql_txq_limit_high[IEEE80211_AC_BE], + local->aql_txq_limit_low[IEEE80211_AC_BK], +- local->aql_txq_limit_high[IEEE80211_AC_BK]); ++ local->aql_txq_limit_high[IEEE80211_AC_BK], ++ local->aql_txq_limit_bc); + return simple_read_from_buffer(user_buf, count, ppos, + buf, len); + } +@@ -279,6 +283,11 @@ static ssize_t aql_txq_limit_write(struc + else + buf[count] = '\0'; + ++ if (sscanf(buf, "mcast %u", &q_limit_low) == 1) { ++ local->aql_txq_limit_bc = q_limit_low; ++ return count; ++ } ++ + if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3) + return -EINVAL; + +--- a/net/mac80211/ieee80211_i.h ++++ b/net/mac80211/ieee80211_i.h +@@ -1328,10 +1328,12 @@ struct ieee80211_local { + spinlock_t handle_wake_tx_queue_lock; + + u16 airtime_flags; ++ u32 aql_txq_limit_bc; + u32 aql_txq_limit_low[IEEE80211_NUM_ACS]; + u32 aql_txq_limit_high[IEEE80211_NUM_ACS]; + u32 aql_threshold; + atomic_t aql_total_pending_airtime; ++ atomic_t aql_bc_pending_airtime; + atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS]; + + const struct ieee80211_ops *ops; +--- a/net/mac80211/main.c ++++ b/net/mac80211/main.c +@@ -788,6 +788,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_ + spin_lock_init(&local->rx_path_lock); + spin_lock_init(&local->queue_stop_reason_lock); + ++ local->aql_txq_limit_bc = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC; + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + INIT_LIST_HEAD(&local->active_txqs[i]); + spin_lock_init(&local->active_txq_lock[i]); +--- a/net/mac80211/sta_info.c ++++ b/net/mac80211/sta_info.c +@@ -2341,28 +2341,27 @@ void ieee80211_sta_update_pending_airtim + struct sta_info *sta, u8 ac, + u16 tx_airtime, bool tx_completed) + { ++ atomic_t *counter; + int tx_pending; + + if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) + return; + +- if (!tx_completed) { +- if (sta) +- atomic_add(tx_airtime, +- &sta->airtime[ac].aql_tx_pending); ++ if (sta) ++ counter = &sta->airtime[ac].aql_tx_pending; ++ else ++ counter = &local->aql_bc_pending_airtime; + ++ if (!tx_completed) { ++ atomic_add(tx_airtime, counter); + atomic_add(tx_airtime, &local->aql_total_pending_airtime); + atomic_add(tx_airtime, &local->aql_ac_pending_airtime[ac]); + return; + } + +- if (sta) { +- tx_pending = atomic_sub_return(tx_airtime, +- &sta->airtime[ac].aql_tx_pending); +- if (tx_pending < 0) +- atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending, +- tx_pending, 0); +- } ++ tx_pending = atomic_sub_return(tx_airtime, counter); ++ if (tx_pending < 0) ++ atomic_cmpxchg(counter, tx_pending, 0); + + atomic_sub(tx_airtime, &local->aql_total_pending_airtime); + tx_pending = atomic_sub_return(tx_airtime, +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -3958,9 +3958,8 @@ begin: + encap_out: + IEEE80211_SKB_CB(skb)->control.vif = vif; + +- if (tx.sta && +- wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { +- bool ampdu = txq->ac != IEEE80211_AC_VO; ++ if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { ++ bool ampdu = txq->sta && txq->ac != IEEE80211_AC_VO; + u32 airtime; + + airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta, +@@ -4026,6 +4025,7 @@ struct ieee80211_txq *ieee80211_next_txq + struct ieee80211_txq *ret = NULL; + struct txq_info *txqi = NULL, *head = NULL; + bool found_eligible_txq = false; ++ bool aql_check; + + spin_lock_bh(&local->active_txq_lock[ac]); + +@@ -4049,26 +4049,26 @@ struct ieee80211_txq *ieee80211_next_txq + if (!head) + head = txqi; + ++ aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq); ++ if (aql_check) ++ found_eligible_txq = true; ++ + if (txqi->txq.sta) { + struct sta_info *sta = container_of(txqi->txq.sta, + struct sta_info, sta); +- bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq); +- s32 deficit = ieee80211_sta_deficit(sta, txqi->txq.ac); +- +- if (aql_check) +- found_eligible_txq = true; +- +- if (deficit < 0) ++ if (ieee80211_sta_deficit(sta, txqi->txq.ac) < 0) { + sta->airtime[txqi->txq.ac].deficit += + sta->airtime_weight << AIRTIME_QUANTUM_SHIFT; +- +- if (deficit < 0 || !aql_check) { +- list_move_tail(&txqi->schedule_order, +- &local->active_txqs[txqi->txq.ac]); +- goto begin; ++ aql_check = false; + } + } + ++ if (!aql_check) { ++ list_move_tail(&txqi->schedule_order, ++ &local->active_txqs[txqi->txq.ac]); ++ goto begin; ++ } ++ + if (txqi->schedule_round == local->schedule_round[ac]) + goto out; + +@@ -4133,7 +4133,8 @@ bool ieee80211_txq_airtime_check(struct + return true; + + if (!txq->sta) +- return true; ++ return atomic_read(&local->aql_bc_pending_airtime) < ++ local->aql_txq_limit_bc; + + if (unlikely(txq->tid == IEEE80211_NUM_TIDS)) + return true; +@@ -4182,15 +4183,15 @@ bool ieee80211_txq_may_transmit(struct i + + spin_lock_bh(&local->active_txq_lock[ac]); + +- if (!txqi->txq.sta) +- goto out; +- + if (list_empty(&txqi->schedule_order)) + goto out; + + if (!ieee80211_txq_schedule_airtime_check(local, ac)) + goto out; + ++ if (!txqi->txq.sta) ++ goto out; ++ + list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac], + schedule_order) { + if (iter == txqi)