Commit 9d6c59c1 authored by Vlastimil Babka's avatar Vlastimil Babka

Merge branch 'for-5.17/struct-slab' into for-linus

Series "Separate struct slab from struct page" v4

This is originally an offshoot of the folio work by Matthew. One of the more
complex parts of the struct page definition are the parts used by the slab
allocators. It would be good for the MM in general if struct slab were its own
data type, and it also helps to prevent tail pages from slipping in anywhere.
As Matthew requested in his proof of concept series, I have taken over the
development of this series, so it's a mix of patches from him (often modified
by me) and my own.

One big difference is the use of coccinelle to perform the relatively trivial
parts of the conversions automatically and at once, instead of a larger number
of smaller incremental reviewable steps. Thanks to Julia Lawall and Luis
Chamberlain for all their help!

Another notable difference is (based also on review feedback) I don't represent
with a struct slab the large kmalloc allocations which are not really a slab,
but use page allocator directly. When going from an object address to a struct
slab, the code tests first folio slab flag, and only if it's set it converts to
struct slab. This makes the struct slab type stronger.

Finally, although Matthew's version didn't use any of the folio work, the
initial support has been merged meanwhile so my version builds on top of it
where appropriate. This eliminates some of the redundant compound_head()
being performed e.g. when testing the slab flag.

To sum up, after this series, struct page fields used by slab allocators are
moved from struct page to a new struct slab, that uses the same physical
storage. The availability of the fields is further distinguished by the
selected slab allocator implementation. The advantages include:

- Similar to folios, if the slab is of order > 0, struct slab always is
  guaranteed to be the head page. Additionally it's guaranteed to be an actual
  slab page, not a large kmalloc. This removes uncertainty and potential for
  bugs.
- It's not possible to accidentally use fields of the slab implementation that's
  not configured.
- Other subsystems cannot use slab's fields in struct page anymore (some
  existing non-slab usages had to be adjusted in this series), so slab
  implementations have more freedom in rearranging them in the struct slab.

Link: https://lore.kernel.org/all/20220104001046.12263-1-vbabka@suse.cz/
parents eb52c0fc b01af5c0
......@@ -981,7 +981,7 @@ static void __meminit free_pagetable(struct page *page, int order)
if (PageReserved(page)) {
__ClearPageReserved(page);
magic = (unsigned long)page->freelist;
magic = page->index;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
......
......@@ -30,7 +30,7 @@ void put_page_bootmem(struct page *page);
*/
static inline void free_bootmem_page(struct page *page)
{
unsigned long magic = (unsigned long)page->freelist;
unsigned long magic = page->index;
/*
* The reserve_bootmem_region sets the reserved flag on bootmem
......
......@@ -9,6 +9,7 @@
struct kmem_cache;
struct page;
struct slab;
struct vm_struct;
struct task_struct;
......@@ -193,11 +194,11 @@ static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
return 0;
}
void __kasan_poison_slab(struct page *page);
static __always_inline void kasan_poison_slab(struct page *page)
void __kasan_poison_slab(struct slab *slab);
static __always_inline void kasan_poison_slab(struct slab *slab)
{
if (kasan_enabled())
__kasan_poison_slab(page);
__kasan_poison_slab(slab);
}
void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
......@@ -322,7 +323,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
slab_flags_t *flags) {}
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_poison_slab(struct slab *slab) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
void *object) {}
static inline void kasan_poison_object_data(struct kmem_cache *cache,
......
......@@ -536,45 +536,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return folio->memcg_data & MEMCG_DATA_KMEM;
}
/*
* page_objcgs - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function assumes that the page is known to have an
* associated object cgroups vector. It's not safe to call this function
* against pages, which might have an associated memory cgroup: e.g.
* kernel stack pages.
*/
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
* page_objcgs_check - get the object cgroups vector associated with a page
* @page: a pointer to the page struct
*
* Returns a pointer to the object cgroups vector associated with the page,
* or NULL. This function is safe to use if the page can be directly associated
* with a memory cgroup.
*/
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
unsigned long memcg_data = READ_ONCE(page->memcg_data);
if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
return NULL;
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
#else
static inline bool folio_memcg_kmem(struct folio *folio)
......@@ -582,15 +543,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return false;
}
static inline struct obj_cgroup **page_objcgs(struct page *page)
{
return NULL;
}
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
{
return NULL;
}
#endif
static inline bool PageMemcgKmem(struct page *page)
......
......@@ -863,6 +863,13 @@ static inline struct page *virt_to_head_page(const void *x)
return compound_head(page);
}
static inline struct folio *virt_to_folio(const void *x)
{
struct page *page = virt_to_page(x);
return page_folio(page);
}
void __put_page(struct page *page);
void put_pages_list(struct list_head *pages);
......@@ -1753,6 +1760,11 @@ void page_address_init(void);
#define page_address_init() do { } while(0)
#endif
static inline void *folio_address(const struct folio *folio)
{
return page_address(&folio->page);
}
extern void *page_rmapping(struct page *page);
extern struct anon_vma *page_anon_vma(struct page *page);
extern pgoff_t __page_file_index(struct page *page);
......
......@@ -56,11 +56,11 @@ struct mem_cgroup;
* in each subpage, but you may need to restore some of their values
* afterwards.
*
* SLUB uses cmpxchg_double() to atomically update its freelist and
* counters. That requires that freelist & counters be adjacent and
* double-word aligned. We align all struct pages to double-word
* boundaries, and ensure that 'freelist' is aligned within the
* struct.
* SLUB uses cmpxchg_double() to atomically update its freelist and counters.
* That requires that freelist & counters in struct slab be adjacent and
* double-word aligned. Because struct slab currently just reinterprets the
* bits of struct page, we align all struct pages to double-word boundaries,
* and ensure that 'freelist' is aligned within struct slab.
*/
#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))
......
......@@ -189,14 +189,6 @@ bool kmem_valid_obj(void *object);
void kmem_dump_obj(void *object);
#endif
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
bool to_user);
#else
static inline void __check_heap_object(const void *ptr, unsigned long n,
struct page *page, bool to_user) { }
#endif
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
......
......@@ -87,11 +87,11 @@ struct kmem_cache {
struct kmem_cache_node *node[MAX_NUMNODES];
};
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x)
{
void *object = x - (x - page->s_mem) % cache->size;
void *last_object = page->s_mem + (cache->num - 1) * cache->size;
void *object = x - (x - slab->s_mem) % cache->size;
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
if (unlikely(object > last_object))
return last_object;
......@@ -106,16 +106,16 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
const struct slab *slab, void *obj)
{
u32 offset = (obj - page->s_mem);
u32 offset = (obj - slab->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
if (is_kfence_address(page_address(page)))
if (is_kfence_address(slab_address(slab)))
return 1;
return cache->num;
}
......
......@@ -48,9 +48,9 @@ enum stat_item {
struct kmem_cache_cpu {
void **freelist; /* Pointer to next available object */
unsigned long tid; /* Globally unique transaction id */
struct page *page; /* The slab from which we are allocating */
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *partial; /* Partially allocated frozen slabs */
struct slab *partial; /* Partially allocated frozen slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
......@@ -99,8 +99,8 @@ struct kmem_cache {
#ifdef CONFIG_SLUB_CPU_PARTIAL
/* Number of per cpu partial objects to keep around */
unsigned int cpu_partial;
/* Number of per cpu partial pages to keep around */
unsigned int cpu_partial_pages;
/* Number of per cpu partial slabs to keep around */
unsigned int cpu_partial_slabs;
#endif
struct kmem_cache_order_objects oo;
......@@ -156,16 +156,13 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
}
#endif
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);
void *fixup_red_left(struct kmem_cache *s, void *p);
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
void *x) {
void *object = x - (x - page_address(page)) % cache->size;
void *last_object = page_address(page) +
(page->objects - 1) * cache->size;
void *object = x - (x - slab_address(slab)) % cache->size;
void *last_object = slab_address(slab) +
(slab->objects - 1) * cache->size;
void *result = (unlikely(object > last_object)) ? last_object : object;
result = fixup_red_left(cache, result);
......@@ -181,16 +178,16 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
}
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
const struct slab *slab, void *obj)
{
if (is_kfence_address(obj))
return 0;
return __obj_to_index(cache, page_address(page), obj);
return __obj_to_index(cache, slab_address(slab), obj);
}
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
static inline int objs_per_slab(const struct kmem_cache *cache,
const struct slab *slab)
{
return page->objects;
return slab->objects;
}
#endif /* _LINUX_SLUB_DEF_H */
......@@ -15,7 +15,7 @@
void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
{
page->freelist = (void *)type;
page->index = type;
SetPagePrivate(page);
set_page_private(page, info);
page_ref_inc(page);
......@@ -23,14 +23,13 @@ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
void put_page_bootmem(struct page *page)
{
unsigned long type;
unsigned long type = page->index;
type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
page->freelist = NULL;
page->index = 0;
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
......
......@@ -247,8 +247,9 @@ struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
}
#endif
void __kasan_poison_slab(struct page *page)
void __kasan_poison_slab(struct slab *slab)
{
struct page *page = slab_page(slab);
unsigned long i;
for (i = 0; i < compound_nr(page); i++)
......@@ -298,7 +299,7 @@ static inline u8 assign_tag(struct kmem_cache *cache,
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
#ifdef CONFIG_SLAB
/* For SLAB assign tags based on the object index in the freelist. */
return (u8)obj_to_index(cache, virt_to_head_page(object), (void *)object);
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
#else
/*
* For SLUB assign a random tag during slab creation, otherwise reuse
......@@ -341,7 +342,7 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
if (is_kfence_address(object))
return false;
if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
if (unlikely(nearest_obj(cache, virt_to_slab(object), object) !=
object)) {
kasan_report_invalid_free(tagged_object, ip);
return true;
......@@ -401,9 +402,9 @@ void __kasan_kfree_large(void *ptr, unsigned long ip)
void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
{
struct page *page;
struct folio *folio;
page = virt_to_head_page(ptr);
folio = virt_to_folio(ptr);
/*
* Even though this function is only called for kmem_cache_alloc and
......@@ -411,12 +412,14 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
* !PageSlab() when the size provided to kmalloc is larger than
* KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc.
*/
if (unlikely(!PageSlab(page))) {
if (unlikely(!folio_test_slab(folio))) {
if (____kasan_kfree_large(ptr, ip))
return;
kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE, false);
kasan_poison(ptr, folio_size(folio), KASAN_FREE_PAGE, false);
} else {
____kasan_slab_free(page->slab_cache, ptr, ip, false, false);
struct slab *slab = folio_slab(folio);
____kasan_slab_free(slab->slab_cache, ptr, ip, false, false);
}
}
......@@ -560,7 +563,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flags)
{
struct page *page;
struct slab *slab;
if (unlikely(object == ZERO_SIZE_PTR))
return (void *)object;
......@@ -572,13 +575,13 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag
*/
kasan_unpoison(object, size, false);
page = virt_to_head_page(object);
slab = virt_to_slab(object);
/* Piggy-back on kmalloc() instrumentation to poison the redzone. */
if (unlikely(!PageSlab(page)))
if (unlikely(!slab))
return __kasan_kmalloc_large(object, size, flags);
else
return ____kasan_kmalloc(page->slab_cache, object, size, flags);
return ____kasan_kmalloc(slab->slab_cache, object, size, flags);
}
bool __kasan_check_byte(const void *address, unsigned long ip)
......
......@@ -330,16 +330,16 @@ DEFINE_ASAN_SET_SHADOW(f8);
static void __kasan_record_aux_stack(void *addr, bool can_alloc)
{
struct page *page = kasan_addr_to_page(addr);
struct slab *slab = kasan_addr_to_slab(addr);
struct kmem_cache *cache;
struct kasan_alloc_meta *alloc_meta;
void *object;
if (is_kfence_address(addr) || !(page && PageSlab(page)))
if (is_kfence_address(addr) || !slab)
return;
cache = page->slab_cache;
object = nearest_obj(cache, page, addr);
cache = slab->slab_cache;
object = nearest_obj(cache, slab, addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (!alloc_meta)
return;
......
......@@ -265,6 +265,7 @@ bool kasan_report(unsigned long addr, size_t size,
void kasan_report_invalid_free(void *object, unsigned long ip);
struct page *kasan_addr_to_page(const void *addr);
struct slab *kasan_addr_to_slab(const void *addr);
depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc);
void kasan_set_track(struct kasan_track *track, gfp_t flags);
......
......@@ -117,7 +117,7 @@ static unsigned long quarantine_batch_size;
static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
{
return virt_to_head_page(qlink)->slab_cache;
return virt_to_slab(qlink)->slab_cache;
}
static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
......
......@@ -150,6 +150,14 @@ struct page *kasan_addr_to_page(const void *addr)
return NULL;
}
struct slab *kasan_addr_to_slab(const void *addr)
{
if ((addr >= (void *)PAGE_OFFSET) &&
(addr < high_memory))
return virt_to_slab(addr);
return NULL;
}
static void describe_object_addr(struct kmem_cache *cache, void *object,
const void *addr)
{
......@@ -248,8 +256,9 @@ static void print_address_description(void *addr, u8 tag)
pr_err("\n");
if (page && PageSlab(page)) {
struct kmem_cache *cache = page->slab_cache;
void *object = nearest_obj(cache, page, addr);
struct slab *slab = page_slab(page);
struct kmem_cache *cache = slab->slab_cache;
void *object = nearest_obj(cache, slab, addr);
describe_object(cache, object, addr, tag);
}
......
......@@ -12,7 +12,7 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
#ifdef CONFIG_KASAN_TAGS_IDENTIFY
struct kasan_alloc_meta *alloc_meta;
struct kmem_cache *cache;
struct page *page;
struct slab *slab;
const void *addr;
void *object;
u8 tag;
......@@ -20,10 +20,10 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
tag = get_tag(info->access_addr);
addr = kasan_reset_tag(info->access_addr);
page = kasan_addr_to_page(addr);
if (page && PageSlab(page)) {
cache = page->slab_cache;
object = nearest_obj(cache, page, (void *)addr);
slab = kasan_addr_to_slab(addr);
if (slab) {
cache = slab->slab_cache;
object = nearest_obj(cache, slab, (void *)addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (alloc_meta) {
......
......@@ -360,7 +360,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
{
struct kfence_metadata *meta = NULL;
unsigned long flags;
struct page *page;
struct slab *slab;
void *addr;
/* Try to obtain a free object. */
......@@ -424,13 +424,14 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
alloc_covered_add(alloc_stack_hash, 1);
/* Set required struct page fields. */
page = virt_to_page(meta->addr);
page->slab_cache = cache;
if (IS_ENABLED(CONFIG_SLUB))
page->objects = 1;
if (IS_ENABLED(CONFIG_SLAB))
page->s_mem = addr;
/* Set required slab fields. */
slab = virt_to_slab((void *)meta->addr);
slab->slab_cache = cache;
#if defined(CONFIG_SLUB)
slab->objects = 1;
#elif defined(CONFIG_SLAB)
slab->s_mem = addr;
#endif
/* Memory initialization. */
for_each_canary(meta, set_canary_byte);
......
......@@ -282,7 +282,7 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
alloc = kmalloc(size, gfp);
if (is_kfence_address(alloc)) {
struct page *page = virt_to_head_page(alloc);
struct slab *slab = virt_to_slab(alloc);
struct kmem_cache *s = test_cache ?:
kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
......@@ -291,8 +291,8 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
* even for KFENCE objects; these are required so that
* memcg accounting works correctly.
*/
KUNIT_EXPECT_EQ(test, obj_to_index(s, page, alloc), 0U);
KUNIT_EXPECT_EQ(test, objs_per_slab_page(s, page), 1);
KUNIT_EXPECT_EQ(test, obj_to_index(s, slab, alloc), 0U);
KUNIT_EXPECT_EQ(test, objs_per_slab(s, slab), 1);
if (policy == ALLOCATE_ANY)
return alloc;
......
......@@ -2816,31 +2816,31 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
rcu_read_unlock();
}
int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
gfp_t gfp, bool new_page)
int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
gfp_t gfp, bool new_slab)
{
unsigned int objects = objs_per_slab_page(s, page);
unsigned int objects = objs_per_slab(s, slab);
unsigned long memcg_data;
void *vec;
gfp &= ~OBJCGS_CLEAR_MASK;
vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
page_to_nid(page));
slab_nid(slab));
if (!vec)
return -ENOMEM;
memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
if (new_page) {
if (new_slab) {
/*
* If the slab page is brand new and nobody can yet access
* it's memcg_data, no synchronization is required and
* memcg_data can be simply assigned.
* If the slab is brand new and nobody can yet access its
* memcg_data, no synchronization is required and memcg_data can
* be simply assigned.
*/
page->memcg_data = memcg_data;
} else if (cmpxchg(&page->memcg_data, 0, memcg_data)) {
slab->memcg_data = memcg_data;
} else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
/*
* If the slab page is already in use, somebody can allocate
* and assign obj_cgroups in parallel. In this case the existing
* If the slab is already in use, somebody can allocate and
* assign obj_cgroups in parallel. In this case the existing
* objcg vector should be reused.
*/
kfree(vec);
......@@ -2865,38 +2865,43 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
*/
struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
struct page *page;
struct folio *folio;
if (mem_cgroup_disabled())
return NULL;
page = virt_to_head_page(p);
folio = virt_to_folio(p);
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
* the page->obj_cgroups.
* slab->memcg_data.
*/
if (page_objcgs_check(page)) {
struct obj_cgroup *objcg;
if (folio_test_slab(folio)) {
struct obj_cgroup **objcgs;
struct slab *slab;
unsigned int off;
off = obj_to_index(page->slab_cache, page, p);
objcg = page_objcgs(page)[off];
if (objcg)
return obj_cgroup_memcg(objcg);
slab = folio_slab(folio);
objcgs = slab_objcgs(slab);
if (!objcgs)
return NULL;
off = obj_to_index(slab->slab_cache, slab, p);
if (objcgs[off])
return obj_cgroup_memcg(objcgs[off]);
return NULL;
}
/*
* page_memcg_check() is used here, because page_has_obj_cgroups()
* check above could fail because the object cgroups vector wasn't set
* at that moment, but it can be set concurrently.
* page_memcg_check() is used here, because in theory we can encounter
* a folio where the slab flag has been cleared already, but
* slab->memcg_data has not been freed yet
* page_memcg_check(page) will guarantee that a proper memory
* cgroup pointer or NULL will be returned.
*/
return page_memcg_check(page);
return page_memcg_check(folio_page(folio, 0));
}
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
......
This diff is collapsed.
This diff is collapsed.
......@@ -550,13 +550,13 @@ bool slab_is_available(void)
*/
bool kmem_valid_obj(void *object)
{
struct page *page;
struct folio *folio;
/* Some arches consider ZERO_SIZE_PTR to be a valid address. */
if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
return false;
page = virt_to_head_page(object);
return PageSlab(page);
folio = virt_to_folio(object);
return folio_test_slab(folio);
}
EXPORT_SYMBOL_GPL(kmem_valid_obj);
......@@ -579,18 +579,18 @@ void kmem_dump_obj(void *object)
{
char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
int i;
struct page *page;
struct slab *slab;
unsigned long ptroffset;
struct kmem_obj_info kp = { };
if (WARN_ON_ONCE(!virt_addr_valid(object)))
return;
page = virt_to_head_page(object);
if (WARN_ON_ONCE(!PageSlab(page))) {
slab = virt_to_slab(object);
if (WARN_ON_ONCE(!slab)) {
pr_cont(" non-slab memory.\n");
return;
}
kmem_obj_info(&kp, object, page);
kmem_obj_info(&kp, object, slab);
if (kp.kp_slab_cache)
pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
else
......
......@@ -30,7 +30,7 @@
* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
* alloc_pages() directly, allocating compound pages so the page order
* does not have to be separately tracked.
* These objects are detected in kfree() because PageSlab()
* These objects are detected in kfree() because folio_test_slab()
* is false for them.
*
* SLAB is emulated on top of SLOB by simply calling constructors and
......@@ -105,21 +105,21 @@ static LIST_HEAD(free_slob_large);
/*
* slob_page_free: true for pages on free_slob_pages list.
*/
static inline int slob_page_free(struct page *sp)
static inline int slob_page_free(struct slab *slab)
{
return PageSlobFree(sp);
return PageSlobFree(slab_page(slab));
}
static void set_slob_page_free(struct page *sp, struct list_head *list)
static void set_slob_page_free(struct slab *slab, struct list_head *list)
{
list_add(&sp->slab_list, list);
__SetPageSlobFree(sp);
list_add(&slab->slab_list, list);
__SetPageSlobFree(slab_page(slab));
}
static inline void clear_slob_page_free(struct page *sp)
static inline void clear_slob_page_free(struct slab *slab)
{
list_del(&sp->slab_list);
__ClearPageSlobFree(sp);
list_del(&slab->slab_list);
__ClearPageSlobFree(slab_page(slab));
}
#define SLOB_UNIT sizeof(slob_t)
......@@ -234,7 +234,7 @@ static void slob_free_pages(void *b, int order)
* freelist, in this case @page_removed_from_list will be set to
* true (set to false otherwise).
*/
static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_page_alloc(struct slab *sp, size_t size, int align,
int align_offset, bool *page_removed_from_list)
{
slob_t *prev, *cur, *aligned = NULL;
......@@ -301,7 +301,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
int align_offset)
{
struct page *sp;
struct folio *folio;
struct slab *sp;
struct list_head *slob_list;
slob_t *b = NULL;
unsigned long flags;
......@@ -323,7 +324,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
* If there's a node specification, search for a partial
* page with a matching node id in the freelist.
*/
if (node != NUMA_NO_NODE && page_to_nid(sp) != node)
if (node != NUMA_NO_NODE && slab_nid(sp) != node)
continue;
#endif
/* Enough room on this page? */
......@@ -358,8 +359,9 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
if (!b)
return NULL;
sp = virt_to_page(b);
__SetPageSlab(sp);
folio = virt_to_folio(b);
__folio_set_slab(folio);
sp = folio_slab(folio);
spin_lock_irqsave(&slob_lock, flags);
sp->units = SLOB_UNITS(PAGE_SIZE);
......@@ -381,7 +383,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
*/
static void slob_free(void *block, int size)
{
struct page *sp;
struct slab *sp;
slob_t *prev, *next, *b = (slob_t *)block;
slobidx_t units;
unsigned long flags;
......@@ -391,7 +393,7 @@ static void slob_free(void *block, int size)
return;
BUG_ON(!size);
sp = virt_to_page(block);
sp = virt_to_slab(block);
units = SLOB_UNITS(size);
spin_lock_irqsave(&slob_lock, flags);
......@@ -401,8 +403,7 @@ static void slob_free(void *block, int size)
if (slob_page_free(sp))
clear_slob_page_free(sp);
spin_unlock_irqrestore(&slob_lock, flags);
__ClearPageSlab(sp);
page_mapcount_reset(sp);
__folio_clear_slab(slab_folio(sp));
slob_free_pages(b, 0);
return;
}
......@@ -462,10 +463,10 @@ static void slob_free(void *block, int size)
}
#ifdef CONFIG_PRINTK
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
kpp->kp_ptr = object;
kpp->kp_page = page;
kpp->kp_slab = slab;
}
#endif
......@@ -544,7 +545,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
void kfree(const void *block)
{
struct page *sp;
struct folio *sp;
trace_kfree(_RET_IP_, block);
......@@ -552,16 +553,17 @@ void kfree(const void *block)
return;
kmemleak_free(block);
sp = virt_to_page(block);
if (PageSlab(sp)) {
sp = virt_to_folio(block);
if (folio_test_slab(sp)) {
int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
} else {
unsigned int order = compound_order(sp);
mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
unsigned int order = folio_order(sp);
mod_node_page_state(folio_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
__free_pages(sp, order);
__free_pages(folio_page(sp, 0), order);
}
}
......@@ -570,7 +572,7 @@ EXPORT_SYMBOL(kfree);
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t __ksize(const void *block)
{
struct page *sp;
struct folio *folio;
int align;
unsigned int *m;
......@@ -578,9 +580,9 @@ size_t __ksize(const void *block)
if (unlikely(block == ZERO_SIZE_PTR))
return 0;
sp = virt_to_page(block);
if (unlikely(!PageSlab(sp)))
return page_size(sp);
folio = virt_to_folio(block);
if (unlikely(!folio_test_slab(folio)))
return folio_size(folio);
align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
m = (unsigned int *)(block - align);
......
This diff is collapsed.
......@@ -722,7 +722,7 @@ static void free_map_bootmem(struct page *memmap)
>> PAGE_SHIFT;
for (i = 0; i < nr_pages; i++, page++) {
magic = (unsigned long) page->freelist;
magic = page->index;
BUG_ON(magic == NODE_INFO);
......
......@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/jump_label.h>
#include <asm/sections.h>
#include "slab.h"
/*
* Checks if a given pointer and length is contained by the current
......@@ -223,7 +224,7 @@ static inline void check_page_span(const void *ptr, unsigned long n,
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
struct page *page;
struct folio *folio;
if (!virt_addr_valid(ptr))
return;
......@@ -231,16 +232,16 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
/*
* When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
* highmem page or fallback to virt_to_page(). The following
* is effectively a highmem-aware virt_to_head_page().
* is effectively a highmem-aware virt_to_slab().
*/
page = compound_head(kmap_to_page((void *)ptr));
folio = page_folio(kmap_to_page((void *)ptr));
if (PageSlab(page)) {
if (folio_test_slab(folio)) {
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, page, to_user);
__check_heap_object(ptr, n, folio_slab(folio), to_user);
} else {
/* Verify object does not incorrectly span multiple pages. */
check_page_span(ptr, n, page, to_user);
check_page_span(ptr, n, folio_page(folio, 0), to_user);
}
}
......
......@@ -17,10 +17,10 @@
*
* Usage of struct page fields:
* page->private: points to zspage
* page->freelist(index): links together all component pages of a zspage
* page->index: links together all component pages of a zspage
* For the huge page, this is always 0, so we use this field
* to store handle.
* page->units: first object offset in a subpage of zspage
* page->page_type: first object offset in a subpage of zspage
*
* Usage of struct page flags:
* PG_private: identifies the first component page
......@@ -489,12 +489,12 @@ static inline struct page *get_first_page(struct zspage *zspage)
static inline int get_first_obj_offset(struct page *page)
{
return page->units;
return page->page_type;
}
static inline void set_first_obj_offset(struct page *page, int offset)
{
page->units = offset;
page->page_type = offset;
}
static inline unsigned int get_freeobj(struct zspage *zspage)
......@@ -827,7 +827,7 @@ static struct page *get_next_page(struct page *page)
if (unlikely(PageHugeObject(page)))
return NULL;
return page->freelist;
return (struct page *)page->index;
}
/**
......@@ -901,7 +901,7 @@ static void reset_page(struct page *page)
set_page_private(page, 0);
page_mapcount_reset(page);
ClearPageHugeObject(page);
page->freelist = NULL;
page->index = 0;
}
static int trylock_zspage(struct zspage *zspage)
......@@ -1027,7 +1027,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
/*
* Allocate individual pages and link them together as:
* 1. all pages are linked together using page->freelist
* 1. all pages are linked together using page->index
* 2. each sub-page point to zspage using page->private
*
* we set PG_private to identify the first page (i.e. no other sub-page
......@@ -1036,7 +1036,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
for (i = 0; i < nr_pages; i++) {
page = pages[i];
set_page_private(page, (unsigned long)zspage);
page->freelist = NULL;
page->index = 0;
if (i == 0) {
zspage->first_page = page;
SetPagePrivate(page);
......@@ -1044,7 +1044,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
class->pages_per_zspage == 1))
SetPageHugeObject(page);
} else {
prev_page->freelist = page;
prev_page->index = (unsigned long)page;
}
prev_page = page;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment