Commit c1a2f7f0 authored by Rik van Riel's avatar Rik van Riel Committed by Ingo Molnar

mm: Allocate the mm_cpumask (mm->cpu_bitmap[]) dynamically based on nr_cpu_ids

The mm_struct always contains a cpumask bitmap, regardless of
CONFIG_CPUMASK_OFFSTACK. That means the first step can be to
simplify things, and simply have one bitmask at the end of the
mm_struct for the mm_cpumask.

This does necessitate moving everything else in mm_struct into
an anonymous sub-structure, which can be randomized when struct
randomization is enabled.

The second step is to determine the correct size for the
mm_struct slab object from the size of the mm_struct
(excluding the CPU bitmap) and the size the cpumask.

For init_mm we can simply allocate the maximum size this
kernel is compiled for, since we only have one init_mm
in the system, anyway.

Pointer magic by Mike Galbraith, to evade -Wstringop-overflow
getting confused by the dynamically sized array.
Tested-by: default avatarSong Liu <songliubraving@fb.com>
Signed-off-by: default avatarRik van Riel <riel@surriel.com>
Signed-off-by: default avatarMike Galbraith <efault@gmx.de>
Signed-off-by: default avatarRik van Riel <riel@surriel.com>
Acked-by: default avatarDave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-team@fb.com
Cc: luto@kernel.org
Link: http://lkml.kernel.org/r/20180716190337.26133-2-riel@surriel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 37c45b23
...@@ -82,6 +82,7 @@ struct mm_struct efi_mm = { ...@@ -82,6 +82,7 @@ struct mm_struct efi_mm = {
.mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
.cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
}; };
static bool disable_runtime; static bool disable_runtime;
......
...@@ -335,6 +335,7 @@ struct core_state { ...@@ -335,6 +335,7 @@ struct core_state {
struct kioctx_table; struct kioctx_table;
struct mm_struct { struct mm_struct {
struct {
struct vm_area_struct *mmap; /* list of VMAs */ struct vm_area_struct *mmap; /* list of VMAs */
struct rb_root mm_rb; struct rb_root mm_rb;
u32 vmacache_seqnum; /* per-thread vmacache */ u32 vmacache_seqnum; /* per-thread vmacache */
...@@ -357,11 +358,11 @@ struct mm_struct { ...@@ -357,11 +358,11 @@ struct mm_struct {
/** /**
* @mm_users: The number of users including userspace. * @mm_users: The number of users including userspace.
* *
* Use mmget()/mmget_not_zero()/mmput() to modify. When this drops * Use mmget()/mmget_not_zero()/mmput() to modify. When this
* to 0 (i.e. when the task exits and there are no other temporary * drops to 0 (i.e. when the task exits and there are no other
* reference holders), we also release a reference on @mm_count * temporary reference holders), we also release a reference on
* (which may then free the &struct mm_struct if @mm_count also * @mm_count (which may then free the &struct mm_struct if
* drops to 0). * @mm_count also drops to 0).
*/ */
atomic_t mm_users; atomic_t mm_users;
...@@ -379,11 +380,14 @@ struct mm_struct { ...@@ -379,11 +380,14 @@ struct mm_struct {
#endif #endif
int map_count; /* number of VMAs */ int map_count; /* number of VMAs */
spinlock_t page_table_lock; /* Protects page tables and some counters */ spinlock_t page_table_lock; /* Protects page tables and some
* counters
*/
struct rw_semaphore mmap_sem; struct rw_semaphore mmap_sem;
struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung struct list_head mmlist; /* List of maybe swapped mm's. These
* together off init_mm.mmlist, and are protected * are globally strung together off
* init_mm.mmlist, and are protected
* by mmlist_lock * by mmlist_lock
*/ */
...@@ -414,12 +418,10 @@ struct mm_struct { ...@@ -414,12 +418,10 @@ struct mm_struct {
struct linux_binfmt *binfmt; struct linux_binfmt *binfmt;
cpumask_var_t cpu_vm_mask_var;
/* Architecture-specific MM context */ /* Architecture-specific MM context */
mm_context_t context; mm_context_t context;
unsigned long flags; /* Must use atomic bitops to access the bits */ unsigned long flags; /* Must use atomic bitops to access */
struct core_state *core_state; /* coredumping support */ struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_MEMBARRIER #ifdef CONFIG_MEMBARRIER
...@@ -452,14 +454,11 @@ struct mm_struct { ...@@ -452,14 +454,11 @@ struct mm_struct {
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
pgtable_t pmd_huge_pte; /* protected by page_table_lock */ pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif #endif
#ifdef CONFIG_CPUMASK_OFFSTACK
struct cpumask cpumask_allocation;
#endif
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
/* /*
* numa_next_scan is the next time that the PTEs will be marked * numa_next_scan is the next time that the PTEs will be marked
* pte_numa. NUMA hinting faults will gather statistics and migrate * pte_numa. NUMA hinting faults will gather statistics and
* pages to new nodes if necessary. * migrate pages to new nodes if necessary.
*/ */
unsigned long numa_next_scan; unsigned long numa_next_scan;
...@@ -470,9 +469,9 @@ struct mm_struct { ...@@ -470,9 +469,9 @@ struct mm_struct {
int numa_scan_seq; int numa_scan_seq;
#endif #endif
/* /*
* An operation with batched TLB flushing is going on. Anything that * An operation with batched TLB flushing is going on. Anything
* can move process memory needs to flush the TLB when moving a * that can move process memory needs to flush the TLB when
* PROT_NONE or PROT_NUMA mapped page. * moving a PROT_NONE or PROT_NUMA mapped page.
*/ */
atomic_t tlb_flush_pending; atomic_t tlb_flush_pending;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
...@@ -489,22 +488,30 @@ struct mm_struct { ...@@ -489,22 +488,30 @@ struct mm_struct {
/* HMM needs to track a few things per mm */ /* HMM needs to track a few things per mm */
struct hmm *hmm; struct hmm *hmm;
#endif #endif
} __randomize_layout; } __randomize_layout;
/*
* The mm_cpumask needs to be at the end of mm_struct, because it
* is dynamically sized based on nr_cpu_ids.
*/
unsigned long cpu_bitmap[];
};
extern struct mm_struct init_mm; extern struct mm_struct init_mm;
/* Pointer magic because the dynamic array size confuses some compilers. */
static inline void mm_init_cpumask(struct mm_struct *mm) static inline void mm_init_cpumask(struct mm_struct *mm)
{ {
#ifdef CONFIG_CPUMASK_OFFSTACK unsigned long cpu_bitmap = (unsigned long)mm;
mm->cpu_vm_mask_var = &mm->cpumask_allocation;
#endif cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap);
cpumask_clear(mm->cpu_vm_mask_var); cpumask_clear((struct cpumask *)cpu_bitmap);
} }
/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
static inline cpumask_t *mm_cpumask(struct mm_struct *mm) static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
{ {
return mm->cpu_vm_mask_var; return (struct cpumask *)&mm->cpu_bitmap;
} }
struct mmu_gather; struct mmu_gather;
......
...@@ -2253,6 +2253,8 @@ static void sighand_ctor(void *data) ...@@ -2253,6 +2253,8 @@ static void sighand_ctor(void *data)
void __init proc_caches_init(void) void __init proc_caches_init(void)
{ {
unsigned int mm_size;
sighand_cachep = kmem_cache_create("sighand_cache", sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0, sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
...@@ -2269,15 +2271,16 @@ void __init proc_caches_init(void) ...@@ -2269,15 +2271,16 @@ void __init proc_caches_init(void)
sizeof(struct fs_struct), 0, sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
NULL); NULL);
/* /*
* FIXME! The "sizeof(struct mm_struct)" currently includes the * The mm_cpumask is located at the end of mm_struct, and is
* whole struct cpumask for the OFFSTACK case. We could change * dynamically sized based on the maximum CPU number this system
* this to *only* allocate as much of it as required by the * can have, taking hotplug into account (nr_cpu_ids).
* maximum number of CPU's we can ever have. The cpumask_allocation
* is at the end of the structure, exactly for that reason.
*/ */
mm_size = sizeof(struct mm_struct) + cpumask_size();
mm_cachep = kmem_cache_create_usercopy("mm_struct", mm_cachep = kmem_cache_create_usercopy("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
offsetof(struct mm_struct, saved_auxv), offsetof(struct mm_struct, saved_auxv),
sizeof_field(struct mm_struct, saved_auxv), sizeof_field(struct mm_struct, saved_auxv),
......
...@@ -15,6 +15,16 @@ ...@@ -15,6 +15,16 @@
#define INIT_MM_CONTEXT(name) #define INIT_MM_CONTEXT(name)
#endif #endif
/*
* For dynamically allocated mm_structs, there is a dynamically sized cpumask
* at the end of the structure, the size of which depends on the maximum CPU
* number the system can see. That way we allocate only as much memory for
* mm_cpumask() as needed for the hundreds, or thousands of processes that
* a system typically runs.
*
* Since there is only one init_mm in the entire system, keep it simple
* and size this cpu_bitmask to NR_CPUS.
*/
struct mm_struct init_mm = { struct mm_struct init_mm = {
.mm_rb = RB_ROOT, .mm_rb = RB_ROOT,
.pgd = swapper_pg_dir, .pgd = swapper_pg_dir,
...@@ -25,5 +35,6 @@ struct mm_struct init_mm = { ...@@ -25,5 +35,6 @@ struct mm_struct init_mm = {
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist), .mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns, .user_ns = &init_user_ns,
.cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
INIT_MM_CONTEXT(init_mm) INIT_MM_CONTEXT(init_mm)
}; };
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment