288 lines
9.0 KiB
Diff
288 lines
9.0 KiB
Diff
From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001
|
|
From: Yu Zhao <yuzhao@google.com>
|
|
Date: Wed, 21 Dec 2022 21:19:02 -0700
|
|
Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard
|
|
|
|
Recall that the aging produces the youngest generation: first it scans
|
|
for accessed pages and updates their gen counters; then it increments
|
|
lrugen->max_seq.
|
|
|
|
The current aging fairness safeguard for kswapd uses two passes to
|
|
ensure the fairness to multiple eligible memcgs. On the first pass,
|
|
which is shared with the eviction, it checks whether all eligible
|
|
memcgs are low on cold pages. If so, it requires a second pass, on
|
|
which it ages all those memcgs at the same time.
|
|
|
|
With memcg LRU, the aging, while ensuring eventual fairness, will run
|
|
when necessary. Therefore the current aging fairness safeguard for
|
|
kswapd will not be needed.
|
|
|
|
Note that memcg LRU only applies to global reclaim. For memcg reclaim,
|
|
the aging can be unfair to different memcgs, i.e., their
|
|
lrugen->max_seq can be incremented at different paces.
|
|
|
|
Link: https://lkml.kernel.org/r/20221222041905.2431096-5-yuzhao@google.com
|
|
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
|
Cc: Johannes Weiner <hannes@cmpxchg.org>
|
|
Cc: Jonathan Corbet <corbet@lwn.net>
|
|
Cc: Michael Larabel <Michael@MichaelLarabel.com>
|
|
Cc: Michal Hocko <mhocko@kernel.org>
|
|
Cc: Mike Rapoport <rppt@kernel.org>
|
|
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
|
Cc: Suren Baghdasaryan <surenb@google.com>
|
|
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
---
|
|
mm/vmscan.c | 126 ++++++++++++++++++++++++----------------------------
|
|
1 file changed, 59 insertions(+), 67 deletions(-)
|
|
|
|
--- a/mm/vmscan.c
|
|
+++ b/mm/vmscan.c
|
|
@@ -131,7 +131,6 @@ struct scan_control {
|
|
|
|
#ifdef CONFIG_LRU_GEN
|
|
/* help kswapd make better choices among multiple memcgs */
|
|
- unsigned int memcgs_need_aging:1;
|
|
unsigned long last_reclaimed;
|
|
#endif
|
|
|
|
@@ -4184,7 +4183,7 @@ done:
|
|
return true;
|
|
}
|
|
|
|
-static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
|
|
+static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
|
|
struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
|
|
{
|
|
int gen, type, zone;
|
|
@@ -4193,6 +4192,13 @@ static bool should_run_aging(struct lruv
|
|
unsigned long total = 0;
|
|
struct lru_gen_page *lrugen = &lruvec->lrugen;
|
|
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
+ DEFINE_MIN_SEQ(lruvec);
|
|
+
|
|
+ /* whether this lruvec is completely out of cold pages */
|
|
+ if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
|
|
+ *nr_to_scan = 0;
|
|
+ return true;
|
|
+ }
|
|
|
|
for (type = !can_swap; type < ANON_AND_FILE; type++) {
|
|
unsigned long seq;
|
|
@@ -4221,8 +4227,6 @@ static bool should_run_aging(struct lruv
|
|
* stalls when the number of generations reaches MIN_NR_GENS. Hence, the
|
|
* ideal number of generations is MIN_NR_GENS+1.
|
|
*/
|
|
- if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
|
|
- return true;
|
|
if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
|
|
return false;
|
|
|
|
@@ -4241,40 +4245,54 @@ static bool should_run_aging(struct lruv
|
|
return false;
|
|
}
|
|
|
|
-static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
|
|
+static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
|
|
{
|
|
- bool need_aging;
|
|
- unsigned long nr_to_scan;
|
|
- int swappiness = get_swappiness(lruvec, sc);
|
|
+ int gen, type, zone;
|
|
+ unsigned long total = 0;
|
|
+ bool can_swap = get_swappiness(lruvec, sc);
|
|
+ struct lru_gen_page *lrugen = &lruvec->lrugen;
|
|
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
DEFINE_MAX_SEQ(lruvec);
|
|
DEFINE_MIN_SEQ(lruvec);
|
|
|
|
- VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
|
|
+ for (type = !can_swap; type < ANON_AND_FILE; type++) {
|
|
+ unsigned long seq;
|
|
|
|
- mem_cgroup_calculate_protection(NULL, memcg);
|
|
+ for (seq = min_seq[type]; seq <= max_seq; seq++) {
|
|
+ gen = lru_gen_from_seq(seq);
|
|
|
|
- if (mem_cgroup_below_min(memcg))
|
|
- return false;
|
|
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
|
|
+ total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
|
|
+ }
|
|
+ }
|
|
|
|
- need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
|
|
+ /* whether the size is big enough to be helpful */
|
|
+ return mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
|
|
+}
|
|
|
|
- if (min_ttl) {
|
|
- int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
|
|
- unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
|
+static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc,
|
|
+ unsigned long min_ttl)
|
|
+{
|
|
+ int gen;
|
|
+ unsigned long birth;
|
|
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
+ DEFINE_MIN_SEQ(lruvec);
|
|
|
|
- if (time_is_after_jiffies(birth + min_ttl))
|
|
- return false;
|
|
+ VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
|
|
|
|
- /* the size is likely too small to be helpful */
|
|
- if (!nr_to_scan && sc->priority != DEF_PRIORITY)
|
|
- return false;
|
|
- }
|
|
+ /* see the comment on lru_gen_page */
|
|
+ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
|
|
+ birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
|
|
|
- if (need_aging)
|
|
- try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
|
|
+ if (time_is_after_jiffies(birth + min_ttl))
|
|
+ return false;
|
|
|
|
- return true;
|
|
+ if (!lruvec_is_sizable(lruvec, sc))
|
|
+ return false;
|
|
+
|
|
+ mem_cgroup_calculate_protection(NULL, memcg);
|
|
+
|
|
+ return !mem_cgroup_below_min(memcg);
|
|
}
|
|
|
|
/* to protect the working set of the last N jiffies */
|
|
@@ -4283,46 +4301,32 @@ static unsigned long lru_gen_min_ttl __r
|
|
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
|
{
|
|
struct mem_cgroup *memcg;
|
|
- bool success = false;
|
|
unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
|
|
|
|
VM_WARN_ON_ONCE(!current_is_kswapd());
|
|
|
|
sc->last_reclaimed = sc->nr_reclaimed;
|
|
|
|
- /*
|
|
- * To reduce the chance of going into the aging path, which can be
|
|
- * costly, optimistically skip it if the flag below was cleared in the
|
|
- * eviction path. This improves the overall performance when multiple
|
|
- * memcgs are available.
|
|
- */
|
|
- if (!sc->memcgs_need_aging) {
|
|
- sc->memcgs_need_aging = true;
|
|
+ /* check the order to exclude compaction-induced reclaim */
|
|
+ if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
|
|
return;
|
|
- }
|
|
-
|
|
- set_mm_walk(pgdat);
|
|
|
|
memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
|
do {
|
|
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
|
|
|
- if (age_lruvec(lruvec, sc, min_ttl))
|
|
- success = true;
|
|
+ if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
|
|
+ mem_cgroup_iter_break(NULL, memcg);
|
|
+ return;
|
|
+ }
|
|
|
|
cond_resched();
|
|
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
|
|
|
- clear_mm_walk();
|
|
-
|
|
- /* check the order to exclude compaction-induced reclaim */
|
|
- if (success || !min_ttl || sc->order)
|
|
- return;
|
|
-
|
|
/*
|
|
* The main goal is to OOM kill if every generation from all memcgs is
|
|
* younger than min_ttl. However, another possibility is all memcgs are
|
|
- * either below min or empty.
|
|
+ * either too small or below min.
|
|
*/
|
|
if (mutex_trylock(&oom_lock)) {
|
|
struct oom_control oc = {
|
|
@@ -4830,33 +4834,27 @@ retry:
|
|
* reclaim.
|
|
*/
|
|
static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
|
|
- bool can_swap, bool *need_aging)
|
|
+ bool can_swap)
|
|
{
|
|
unsigned long nr_to_scan;
|
|
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
DEFINE_MAX_SEQ(lruvec);
|
|
- DEFINE_MIN_SEQ(lruvec);
|
|
|
|
if (mem_cgroup_below_min(memcg) ||
|
|
(mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
|
|
return 0;
|
|
|
|
- *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
|
|
- if (!*need_aging)
|
|
+ if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
|
|
return nr_to_scan;
|
|
|
|
/* skip the aging path at the default priority */
|
|
if (sc->priority == DEF_PRIORITY)
|
|
- goto done;
|
|
+ return nr_to_scan;
|
|
|
|
- /* leave the work to lru_gen_age_node() */
|
|
- if (current_is_kswapd())
|
|
- return 0;
|
|
+ try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
|
|
|
|
- if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
|
|
- return nr_to_scan;
|
|
-done:
|
|
- return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
|
|
+ /* skip this lruvec as it's low on cold pages */
|
|
+ return 0;
|
|
}
|
|
|
|
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
|
|
@@ -4875,9 +4873,7 @@ static unsigned long get_nr_to_reclaim(s
|
|
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
|
{
|
|
struct blk_plug plug;
|
|
- bool need_aging = false;
|
|
unsigned long scanned = 0;
|
|
- unsigned long reclaimed = sc->nr_reclaimed;
|
|
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
|
|
|
|
lru_add_drain();
|
|
@@ -4898,13 +4894,13 @@ static void lru_gen_shrink_lruvec(struct
|
|
else
|
|
swappiness = 0;
|
|
|
|
- nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
|
|
+ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
|
|
if (!nr_to_scan)
|
|
- goto done;
|
|
+ break;
|
|
|
|
delta = evict_pages(lruvec, sc, swappiness);
|
|
if (!delta)
|
|
- goto done;
|
|
+ break;
|
|
|
|
scanned += delta;
|
|
if (scanned >= nr_to_scan)
|
|
@@ -4916,10 +4912,6 @@ static void lru_gen_shrink_lruvec(struct
|
|
cond_resched();
|
|
}
|
|
|
|
- /* see the comment in lru_gen_age_node() */
|
|
- if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
|
|
- sc->memcgs_need_aging = false;
|
|
-done:
|
|
clear_mm_walk();
|
|
|
|
blk_finish_plug(&plug);
|