mm: add free_unref_folios()

Iterate over a folio_batch rather than a linked list. This is easier for the CPU to prefetch and has a batch count naturally built in so we don't need to track it. Again, this lowers the maximum lock hold time from 32 folios to 15, but I do not expect this to have a significant effect. Link: https://lkml.kernel.org/r/20240227174254.710559-4-willy@infradead.orgSigned-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: David Hildenbrand <david@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Ryan Roberts <ryan.roberts@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

mm: add free_unref_folios()
Iterate over a folio_batch rather than a linked list. This is easier for the CPU to prefetch and has a batch count naturally built in so we don't need to track it. Again, this lowers the maximum lock hold time from 32 folios to 15, but I do not expect this to have a significant effect. Link: https://lkml.kernel.org/r/20240227174254.710559-4-willy@infradead.orgSigned-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: David Hildenbrand <david@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Ryan Roberts <ryan.roberts@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
90491d87 · Matthew Wilcox (Oracle) · Andrew Morton · 7c76d922 · 90491d87 · 90491d87
Commit 90491d87 authored Feb 27, 2024 by Matthew Wilcox (Oracle) Committed by Andrew Morton Mar 04, 2024
Show whitespace changes
Inline Side-by-side

Showing with 39 additions and 25 deletions

mm/internal.h mm/internal.h +3 -2

mm/page_alloc.c mm/page_alloc.c +36 -23

No files found.
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -451,8 +451,9 @@ extern bool free_pages_prepare(struct page *page, unsigned int order);
 extern int user_min_free_kbytes;
-extern void free_unref_page(struct page *page, unsigned int order);
+void free_unref_page(struct page *page, unsigned int order);
-extern void free_unref_page_list(struct list_head *list);
+void free_unref_folios(struct folio_batch *fbatch);
+void free_unref_page_list(struct list_head *list);
 extern void zone_pcp_reset(struct zone *zone);
 extern void zone_pcp_disable(struct zone *zone);

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -32,6 +32,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/pagevec.h>
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmstat.h>
@@ -2515,57 +2516,51 @@ void free_unref_page(struct page *page, unsigned int order)
 }
 /*
- * Free a list of 0-order pages
+ * Free a batch of 0-order pages
 */
-void free_unref_page_list(struct list_head *list)
+void free_unref_folios(struct folio_batch *folios)
 {
 	unsigned long __maybe_unused UP_flags;
-	struct folio *folio, *next;
 	struct per_cpu_pages *pcp = NULL;
 	struct zone *locked_zone = NULL;
-	int batch_count = 0;
+	int i, j, migratetype;
-	int migratetype;
-	/* Prepare pages for freeing */
+	/* Prepare folios for freeing */
-	list_for_each_entry_safe(folio, next, list, lru) {
+	for (i = 0, j = 0; i < folios->nr; i++) {
+		struct folio *folio = folios->folios[i];
 		unsigned long pfn = folio_pfn(folio);
-		if (!free_unref_page_prepare(&folio->page, pfn, 0)) {
+		if (!free_unref_page_prepare(&folio->page, pfn, 0))
-			list_del(&folio->lru);
 			continue;
-		}
 		/*
-		 * Free isolated pages directly to the allocator, see
+		 * Free isolated folios directly to the allocator, see
 		 * comment in free_unref_page.
 		 */
 		migratetype = get_pcppage_migratetype(&folio->page);
 		if (unlikely(is_migrate_isolate(migratetype))) {
-			list_del(&folio->lru);
 			free_one_page(folio_zone(folio), &folio->page, pfn,
 					0, migratetype, FPI_NONE);
 			continue;
 		}
+		if (j != i)
+			folios->folios[j] = folio;
+		j++;
 	}
+	folios->nr = j;
-	list_for_each_entry_safe(folio, next, list, lru) {
+	for (i = 0; i < folios->nr; i++) {
+		struct folio *folio = folios->folios[i];
 		struct zone *zone = folio_zone(folio);
-		list_del(&folio->lru);
 		migratetype = get_pcppage_migratetype(&folio->page);
-		/*
+		/* Different zone requires a different pcp lock */
-		 * Either different zone requiring a different pcp lock or
+		if (zone != locked_zone) {
-		 * excessive lock hold times when freeing a large list of
-		 * folios.
-		 */
-		if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {
 			if (pcp) {
 				pcp_spin_unlock(pcp);
 				pcp_trylock_finish(UP_flags);
 			}
-			batch_count = 0;
 			/*
 			 * trylock is necessary as folios may be getting freed
 			 * from IRQ or SoftIRQ context after an IO completion.
@@ -2592,13 +2587,31 @@ void free_unref_page_list(struct list_head *list)
 		trace_mm_page_free_batched(&folio->page);
 		free_unref_page_commit(zone, pcp, &folio->page, migratetype, 0);
-		batch_count++;
 	}
 	if (pcp) {
 		pcp_spin_unlock(pcp);
 		pcp_trylock_finish(UP_flags);
 	}
+	folio_batch_reinit(folios);
+}
+void free_unref_page_list(struct list_head *list)
+{
+	struct folio_batch fbatch;
+	folio_batch_init(&fbatch);
+	while (!list_empty(list)) {
+		struct folio *folio = list_first_entry(list, struct folio, lru);
+		list_del(&folio->lru);
+		if (folio_batch_add(&fbatch, folio) > 0)
+			continue;
+		free_unref_folios(&fbatch);
+	}
+	if (fbatch.nr)
+		free_unref_folios(&fbatch);
 }
 /*