mm: vmscan: decide whether to compact the pgdat based on reclaim progress
In the past, kswapd makes a decision on whether to compact memory after
the pgdat was considered balanced. This more or less worked but it is
late to make such a decision and does not fit well now that kswapd makes
a decision whether to exit the zone scanning loop depending on reclaim
progress.
This patch will compact a pgdat if at least the requested number of
pages were reclaimed from unbalanced zones for a given priority. If any
zone is currently balanced, kswapd will not call compaction as it is
expected the necessary pages are already available.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Tested-by: Zlatko Calusic <zcalusic@bitsync.net>
Cc: dormando <dormando@rydia.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1c10ee5..cd09803 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2661,7 +2661,8 @@
*/
static bool kswapd_shrink_zone(struct zone *zone,
struct scan_control *sc,
- unsigned long lru_pages)
+ unsigned long lru_pages,
+ unsigned long *nr_attempted)
{
unsigned long nr_slab;
struct reclaim_state *reclaim_state = current->reclaim_state;
@@ -2677,6 +2678,9 @@
nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages);
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+ /* Account for the number of pages attempted to reclaim */
+ *nr_attempted += sc->nr_to_reclaim;
+
if (nr_slab == 0 && !zone_reclaimable(zone))
zone->all_unreclaimable = 1;
@@ -2724,7 +2728,9 @@
do {
unsigned long lru_pages = 0;
+ unsigned long nr_attempted = 0;
bool raise_priority = true;
+ bool pgdat_needs_compaction = (order > 0);
sc.nr_reclaimed = 0;
@@ -2774,7 +2780,21 @@
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
+ if (!populated_zone(zone))
+ continue;
+
lru_pages += zone_reclaimable_pages(zone);
+
+ /*
+ * If any zone is currently balanced then kswapd will
+ * not call compaction as it is expected that the
+ * necessary pages are already available.
+ */
+ if (pgdat_needs_compaction &&
+ zone_watermark_ok(zone, order,
+ low_wmark_pages(zone),
+ *classzone_idx, 0))
+ pgdat_needs_compaction = false;
}
/*
@@ -2843,7 +2863,8 @@
* already being scanned that high
* watermark would be met at 100% efficiency.
*/
- if (kswapd_shrink_zone(zone, &sc, lru_pages))
+ if (kswapd_shrink_zone(zone, &sc, lru_pages,
+ &nr_attempted))
raise_priority = false;
}
@@ -2896,6 +2917,13 @@
break;
/*
+ * Compact if necessary and kswapd is reclaiming at least the
+ * high watermark number of pages as requsted
+ */
+ if (pgdat_needs_compaction && sc.nr_reclaimed > nr_attempted)
+ compact_pgdat(pgdat, order);
+
+ /*
* Raise priority if scanning rate is too low or there was no
* progress in reclaiming pages
*/
@@ -2904,33 +2932,6 @@
} while (sc.priority >= 0 &&
!pgdat_balanced(pgdat, order, *classzone_idx));
- /*
- * If kswapd was reclaiming at a higher order, it has the option of
- * sleeping without all zones being balanced. Before it does, it must
- * ensure that the watermarks for order-0 on *all* zones are met and
- * that the congestion flags are cleared. The congestion flag must
- * be cleared as kswapd is the only mechanism that clears the flag
- * and it is potentially going to sleep here.
- */
- if (order) {
- int zones_need_compaction = 1;
-
- for (i = 0; i <= end_zone; i++) {
- struct zone *zone = pgdat->node_zones + i;
-
- if (!populated_zone(zone))
- continue;
-
- /* Check if the memory needs to be defragmented. */
- if (zone_watermark_ok(zone, order,
- low_wmark_pages(zone), *classzone_idx, 0))
- zones_need_compaction = 0;
- }
-
- if (zones_need_compaction)
- compact_pgdat(pgdat, order);
- }
-
out:
/*
* Return the order we were reclaiming at so prepare_kswapd_sleep()