Commit dad6a5eb authored by Hugh Dickins's avatar Hugh Dickins Committed by Andrew Morton

mm,hugetlb: use folio fields in second tail page

Patch series "mm,huge,rmap: unify and speed up compound mapcounts".


This patch (of 3):

We want to declare one more int in the first tail of a compound page: that
first tail page being valuable property, since every compound page has a
first tail, but perhaps no more than that.

No problem on 64-bit: there is already space for it.  No problem with
32-bit THPs: 5.18 commit 5232c63f ("mm: Make compound_pincount always
available") kindly cleared the space for it, apparently not realizing that
only 64-bit architectures enable CONFIG_THP_SWAP (whose use of tail
page->private might conflict) - but make sure of that in its Kconfig.

But hugetlb pages use tail page->private of the first tail page for a
subpool pointer, which will conflict; and they also use page->private of
the 2nd, 3rd and 4th tails.

Undo "mm: add private field of first tail to struct page and struct
folio"'s recent addition of private_1 to the folio tail: instead add
hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison to
a second tail page of the folio: THP has long been using several fields of
that tail, so make better use of it for hugetlb too.  This is not how a
generic folio should be declared in future, but it is an effective
transitional way to make use of it.

Delete the SUBPAGE_INDEX stuff, but keep __NR_USED_SUBPAGE: now 3.

[hughd@google.com: prefix folio's page_1 and page_2 with double underscore,
  give folio's _flags_2 and _head_2 a line documentation each]
  Link: https://lkml.kernel.org/r/9e2cb6b-5b58-d3f2-b5ee-5f8a14e8f10@google.com
Link: https://lkml.kernel.org/r/5f52de70-975-e94f-f141-543765736181@google.com
Link: https://lkml.kernel.org/r/3818cc9a-9999-d064-d778-9c94c5911e6@google.comSigned-off-by: default avatarHugh Dickins <hughd@google.com>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 634ba645
...@@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t; ...@@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t;
/* /*
* For HugeTLB page, there are more metadata to save in the struct page. But * For HugeTLB page, there are more metadata to save in the struct page. But
* the head struct page cannot meet our needs, so we have to abuse other tail * the head struct page cannot meet our needs, so we have to abuse other tail
* struct page to store the metadata. In order to avoid conflicts caused by * struct page to store the metadata.
* subsequent use of more tail struct pages, we gather these discrete indexes
* of tail struct page here.
*/ */
enum { #define __NR_USED_SUBPAGE 3
SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */
#ifdef CONFIG_CGROUP_HUGETLB
SUBPAGE_INDEX_CGROUP, /* reuse page->private */
SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */
__MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
#endif
#ifdef CONFIG_MEMORY_FAILURE
SUBPAGE_INDEX_HWPOISON,
#endif
__NR_USED_SUBPAGE,
};
struct hugepage_subpool { struct hugepage_subpool {
spinlock_t lock; spinlock_t lock;
...@@ -722,11 +709,11 @@ extern unsigned int default_hstate_idx; ...@@ -722,11 +709,11 @@ extern unsigned int default_hstate_idx;
static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
{ {
return (void *)folio_get_private_1(folio); return folio->_hugetlb_subpool;
} }
/* /*
* hugetlb page subpool pointer located in hpage[1].private * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
*/ */
static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
{ {
...@@ -736,7 +723,7 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) ...@@ -736,7 +723,7 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
static inline void hugetlb_set_folio_subpool(struct folio *folio, static inline void hugetlb_set_folio_subpool(struct folio *folio,
struct hugepage_subpool *subpool) struct hugepage_subpool *subpool)
{ {
folio_set_private_1(folio, (unsigned long)subpool); folio->_hugetlb_subpool = subpool;
} }
static inline void hugetlb_set_page_subpool(struct page *hpage, static inline void hugetlb_set_page_subpool(struct page *hpage,
......
...@@ -24,12 +24,10 @@ struct file_region; ...@@ -24,12 +24,10 @@ struct file_region;
#ifdef CONFIG_CGROUP_HUGETLB #ifdef CONFIG_CGROUP_HUGETLB
/* /*
* Minimum page order trackable by hugetlb cgroup. * Minimum page order trackable by hugetlb cgroup.
* At least 4 pages are necessary for all the tracking information. * At least 3 pages are necessary for all the tracking information.
* The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault * The second tail page contains all of the hugetlb-specific fields.
* usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
* is the reservation usage cgroup.
*/ */
#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1) #define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
enum hugetlb_memory_event { enum hugetlb_memory_event {
HUGETLB_MAX, HUGETLB_MAX,
...@@ -69,21 +67,13 @@ struct hugetlb_cgroup { ...@@ -69,21 +67,13 @@ struct hugetlb_cgroup {
static inline struct hugetlb_cgroup * static inline struct hugetlb_cgroup *
__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
{ {
struct page *tail;
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
return NULL; return NULL;
if (rsvd)
if (rsvd) { return folio->_hugetlb_cgroup_rsvd;
tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD); else
return (void *)page_private(tail); return folio->_hugetlb_cgroup;
}
else {
tail = folio_page(folio, SUBPAGE_INDEX_CGROUP);
return (void *)page_private(tail);
}
} }
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
...@@ -101,15 +91,12 @@ static inline void __set_hugetlb_cgroup(struct folio *folio, ...@@ -101,15 +91,12 @@ static inline void __set_hugetlb_cgroup(struct folio *folio,
struct hugetlb_cgroup *h_cg, bool rsvd) struct hugetlb_cgroup *h_cg, bool rsvd)
{ {
VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
return; return;
if (rsvd) if (rsvd)
set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD), folio->_hugetlb_cgroup_rsvd = h_cg;
(unsigned long)h_cg);
else else
set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP), folio->_hugetlb_cgroup = h_cg;
(unsigned long)h_cg);
} }
static inline void set_hugetlb_cgroup(struct folio *folio, static inline void set_hugetlb_cgroup(struct folio *folio,
......
...@@ -145,15 +145,22 @@ struct page { ...@@ -145,15 +145,22 @@ struct page {
atomic_t compound_pincount; atomic_t compound_pincount;
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
unsigned int compound_nr; /* 1 << compound_order */ unsigned int compound_nr; /* 1 << compound_order */
unsigned long _private_1;
#endif #endif
}; };
struct { /* Second tail page of compound page */ struct { /* Second tail page of transparent huge page */
unsigned long _compound_pad_1; /* compound_head */ unsigned long _compound_pad_1; /* compound_head */
unsigned long _compound_pad_2; unsigned long _compound_pad_2;
/* For both global and memcg */ /* For both global and memcg */
struct list_head deferred_list; struct list_head deferred_list;
}; };
struct { /* Second tail page of hugetlb page */
unsigned long _hugetlb_pad_1; /* compound_head */
void *hugetlb_subpool;
void *hugetlb_cgroup;
void *hugetlb_cgroup_rsvd;
void *hugetlb_hwpoison;
/* No more space on 32-bit: use third tail if more */
};
struct { /* Page table pages */ struct { /* Page table pages */
unsigned long _pt_pad_1; /* compound_head */ unsigned long _pt_pad_1; /* compound_head */
pgtable_t pmd_huge_pte; /* protected by page->ptl */ pgtable_t pmd_huge_pte; /* protected by page->ptl */
...@@ -260,13 +267,18 @@ struct page { ...@@ -260,13 +267,18 @@ struct page {
* to find how many references there are to this folio. * to find how many references there are to this folio.
* @memcg_data: Memory Control Group data. * @memcg_data: Memory Control Group data.
* @_flags_1: For large folios, additional page flags. * @_flags_1: For large folios, additional page flags.
* @__head: Points to the folio. Do not use. * @_head_1: Points to the folio. Do not use.
* @_folio_dtor: Which destructor to use for this folio. * @_folio_dtor: Which destructor to use for this folio.
* @_folio_order: Do not use directly, call folio_order(). * @_folio_order: Do not use directly, call folio_order().
* @_total_mapcount: Do not use directly, call folio_entire_mapcount(). * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
* @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
* @_folio_nr_pages: Do not use directly, call folio_nr_pages(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
* @_private_1: Do not use directly, call folio_get_private_1(). * @_flags_2: For alignment. Do not use.
* @_head_2: Points to the folio. Do not use.
* @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
* @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
* @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
* @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
* *
* A folio is a physically, virtually and logically contiguous set * A folio is a physically, virtually and logically contiguous set
* of bytes. It is a power-of-two in size, and it is aligned to that * of bytes. It is a power-of-two in size, and it is aligned to that
...@@ -305,16 +317,31 @@ struct folio { ...@@ -305,16 +317,31 @@ struct folio {
}; };
struct page page; struct page page;
}; };
unsigned long _flags_1; union {
unsigned long __head; struct {
unsigned char _folio_dtor; unsigned long _flags_1;
unsigned char _folio_order; unsigned long _head_1;
atomic_t _total_mapcount; unsigned char _folio_dtor;
atomic_t _pincount; unsigned char _folio_order;
atomic_t _total_mapcount;
atomic_t _pincount;
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
unsigned int _folio_nr_pages; unsigned int _folio_nr_pages;
#endif #endif
unsigned long _private_1; };
struct page __page_1;
};
union {
struct {
unsigned long _flags_2;
unsigned long _head_2;
void *_hugetlb_subpool;
void *_hugetlb_cgroup;
void *_hugetlb_cgroup_rsvd;
void *_hugetlb_hwpoison;
};
struct page __page_2;
};
}; };
#define FOLIO_MATCH(pg, fl) \ #define FOLIO_MATCH(pg, fl) \
...@@ -335,16 +362,25 @@ FOLIO_MATCH(memcg_data, memcg_data); ...@@ -335,16 +362,25 @@ FOLIO_MATCH(memcg_data, memcg_data);
static_assert(offsetof(struct folio, fl) == \ static_assert(offsetof(struct folio, fl) == \
offsetof(struct page, pg) + sizeof(struct page)) offsetof(struct page, pg) + sizeof(struct page))
FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(flags, _flags_1);
FOLIO_MATCH(compound_head, __head); FOLIO_MATCH(compound_head, _head_1);
FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_dtor, _folio_dtor);
FOLIO_MATCH(compound_order, _folio_order); FOLIO_MATCH(compound_order, _folio_order);
FOLIO_MATCH(compound_mapcount, _total_mapcount); FOLIO_MATCH(compound_mapcount, _total_mapcount);
FOLIO_MATCH(compound_pincount, _pincount); FOLIO_MATCH(compound_pincount, _pincount);
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
FOLIO_MATCH(compound_nr, _folio_nr_pages); FOLIO_MATCH(compound_nr, _folio_nr_pages);
FOLIO_MATCH(_private_1, _private_1);
#endif #endif
#undef FOLIO_MATCH #undef FOLIO_MATCH
#define FOLIO_MATCH(pg, fl) \
static_assert(offsetof(struct folio, fl) == \
offsetof(struct page, pg) + 2 * sizeof(struct page))
FOLIO_MATCH(flags, _flags_2);
FOLIO_MATCH(compound_head, _head_2);
FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool);
FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup);
FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd);
FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison);
#undef FOLIO_MATCH
static inline atomic_t *folio_mapcount_ptr(struct folio *folio) static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
{ {
...@@ -388,16 +424,6 @@ static inline void *folio_get_private(struct folio *folio) ...@@ -388,16 +424,6 @@ static inline void *folio_get_private(struct folio *folio)
return folio->private; return folio->private;
} }
static inline void folio_set_private_1(struct folio *folio, unsigned long private)
{
folio->_private_1 = private;
}
static inline unsigned long folio_get_private_1(struct folio *folio)
{
return folio->_private_1;
}
struct page_frag_cache { struct page_frag_cache {
void * va; void * va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
......
...@@ -775,7 +775,7 @@ endchoice ...@@ -775,7 +775,7 @@ endchoice
config THP_SWAP config THP_SWAP
def_bool y def_bool y
depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT
help help
Swap transparent huge pages in one piece, without splitting. Swap transparent huge pages in one piece, without splitting.
XXX: For now, swap cluster backing transparent huge page XXX: For now, swap cluster backing transparent huge page
......
...@@ -1687,8 +1687,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs); ...@@ -1687,8 +1687,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
/* /*
* Struct raw_hwp_page represents information about "raw error page", * Struct raw_hwp_page represents information about "raw error page",
* constructing singly linked list originated from ->private field of * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
* SUBPAGE_INDEX_HWPOISON-th tail page.
*/ */
struct raw_hwp_page { struct raw_hwp_page {
struct llist_node node; struct llist_node node;
...@@ -1697,7 +1696,7 @@ struct raw_hwp_page { ...@@ -1697,7 +1696,7 @@ struct raw_hwp_page {
static inline struct llist_head *raw_hwp_list_head(struct page *hpage) static inline struct llist_head *raw_hwp_list_head(struct page *hpage)
{ {
return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON); return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison;
} }
static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag) static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment