Commit 0ffedcda authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - make the debugfs 'kernel_page_tables' file read-only, as it only
     has read ops.  (Borislav Petkov)

   - micro-optimize clflush_cache_range() (Chris Wilson)

   - swiotlb enhancements, which fixes certain KVM emulated devices
     (Igor Mammedov)

   - fix an LDT related debug message (Jan Beulich)

   - modularize CONFIG_X86_PTDUMP (Kees Cook)

   - tone down an overly alarming warning (Laura Abbott)

   - Mark variable __initdata (Rasmus Villemoes)

   - PAT additions (Toshi Kani)"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Micro-optimise clflush_cache_range()
  x86/mm/pat: Change free_memtype() to support shrinking case
  x86/mm/pat: Add untrack_pfn_moved for mremap
  x86/mm: Drop WARN from multi-BAR check
  x86/LDT: Print the real LDT base address
  x86/mm/64: Enable SWIOTLB if system has SRAT memory regions above MAX_DMA32_PFN
  x86/mm: Introduce max_possible_pfn
  x86/mm/ptdump: Make (debugfs)/kernel_page_tables read-only
  x86/mm/mtrr: Mark the 'range_new' static variable in mtrr_calc_range_state() as __initdata
  x86/mm: Turn CONFIG_X86_PTDUMP into a module
parents 6896d9f7 1f1a89ac
...@@ -69,7 +69,7 @@ config X86_PTDUMP_CORE ...@@ -69,7 +69,7 @@ config X86_PTDUMP_CORE
def_bool n def_bool n
config X86_PTDUMP config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs" tristate "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL
select DEBUG_FS select DEBUG_FS
select X86_PTDUMP_CORE select X86_PTDUMP_CORE
......
...@@ -593,9 +593,16 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, ...@@ -593,9 +593,16 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
unsigned long x_remove_base, unsigned long x_remove_base,
unsigned long x_remove_size, int i) unsigned long x_remove_size, int i)
{ {
static struct range range_new[RANGE_NUM]; /*
* range_new should really be an automatic variable, but
* putting 4096 bytes on the stack is frowned upon, to put it
* mildly. It is safe to make it a static __initdata variable,
* since mtrr_calc_range_state is only called during init and
* there's no way it will call itself recursively.
*/
static struct range range_new[RANGE_NUM] __initdata;
unsigned long range_sums_new; unsigned long range_sums_new;
static int nr_range_new; int nr_range_new;
int num_reg; int num_reg;
/* Convert ranges to var ranges state: */ /* Convert ranges to var ranges state: */
......
...@@ -88,7 +88,7 @@ int __init pci_swiotlb_detect_4gb(void) ...@@ -88,7 +88,7 @@ int __init pci_swiotlb_detect_4gb(void)
{ {
/* don't initialize swiotlb if iommu=off (no_iommu=1) */ /* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
if (!no_iommu && max_pfn > MAX_DMA32_PFN) if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1; swiotlb = 1;
#endif #endif
return swiotlb; return swiotlb;
......
...@@ -125,7 +125,7 @@ void release_thread(struct task_struct *dead_task) ...@@ -125,7 +125,7 @@ void release_thread(struct task_struct *dead_task)
if (dead_task->mm->context.ldt) { if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm, dead_task->comm,
dead_task->mm->context.ldt, dead_task->mm->context.ldt->entries,
dead_task->mm->context.ldt->size); dead_task->mm->context.ldt->size);
BUG(); BUG();
} }
......
...@@ -1048,6 +1048,8 @@ void __init setup_arch(char **cmdline_p) ...@@ -1048,6 +1048,8 @@ void __init setup_arch(char **cmdline_p)
if (mtrr_trim_uncached_memory(max_pfn)) if (mtrr_trim_uncached_memory(max_pfn))
max_pfn = e820_end_of_ram_pfn(); max_pfn = e820_end_of_ram_pfn();
max_possible_pfn = max_pfn;
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* max_low_pfn get updated here */ /* max_low_pfn get updated here */
find_low_pfn_range(); find_low_pfn_range();
......
...@@ -15,6 +15,7 @@ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o ...@@ -15,6 +15,7 @@ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o obj-$(CONFIG_HIGHMEM) += highmem_32.o
......
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <asm/pgtable.h>
static int ptdump_show(struct seq_file *m, void *v)
{
ptdump_walk_pgd_level(m, NULL);
return 0;
}
static int ptdump_open(struct inode *inode, struct file *filp)
{
return single_open(filp, ptdump_show, NULL);
}
static const struct file_operations ptdump_fops = {
.owner = THIS_MODULE,
.open = ptdump_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *pe;
static int __init pt_dump_debug_init(void)
{
pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
&ptdump_fops);
if (!pe)
return -ENOMEM;
return 0;
}
static void __exit pt_dump_debug_exit(void)
{
debugfs_remove_recursive(pe);
}
module_init(pt_dump_debug_init);
module_exit(pt_dump_debug_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
...@@ -426,38 +426,15 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) ...@@ -426,38 +426,15 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{ {
ptdump_walk_pgd_level_core(m, pgd, false); ptdump_walk_pgd_level_core(m, pgd, false);
} }
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
void ptdump_walk_pgd_level_checkwx(void) void ptdump_walk_pgd_level_checkwx(void)
{ {
ptdump_walk_pgd_level_core(NULL, NULL, true); ptdump_walk_pgd_level_core(NULL, NULL, true);
} }
#ifdef CONFIG_X86_PTDUMP static int __init pt_dump_init(void)
static int ptdump_show(struct seq_file *m, void *v)
{ {
ptdump_walk_pgd_level(m, NULL);
return 0;
}
static int ptdump_open(struct inode *inode, struct file *filp)
{
return single_open(filp, ptdump_show, NULL);
}
static const struct file_operations ptdump_fops = {
.open = ptdump_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif
static int pt_dump_init(void)
{
#ifdef CONFIG_X86_PTDUMP
struct dentry *pe;
#endif
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */ /* Not a compile-time constant on x86-32 */
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
...@@ -468,13 +445,6 @@ static int pt_dump_init(void) ...@@ -468,13 +445,6 @@ static int pt_dump_init(void)
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
#endif #endif
#ifdef CONFIG_X86_PTDUMP
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
&ptdump_fops);
if (!pe)
return -ENOMEM;
#endif
return 0; return 0;
} }
......
...@@ -194,8 +194,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, ...@@ -194,8 +194,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* Check if the request spans more than any BAR in the iomem resource * Check if the request spans more than any BAR in the iomem resource
* tree. * tree.
*/ */
WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size), if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); pr_warn("caller %pS mapping multiple BARs\n", caller);
return ret_addr; return ret_addr;
err_free_area: err_free_area:
......
...@@ -129,14 +129,16 @@ within(unsigned long addr, unsigned long start, unsigned long end) ...@@ -129,14 +129,16 @@ within(unsigned long addr, unsigned long start, unsigned long end)
*/ */
void clflush_cache_range(void *vaddr, unsigned int size) void clflush_cache_range(void *vaddr, unsigned int size)
{ {
unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
void *vend = vaddr + size; void *vend = vaddr + size;
void *p;
if (p >= vend)
return;
mb(); mb();
for (p = (void *)((unsigned long)vaddr & ~clflush_mask); for (; p < vend; p += clflush_size)
p < vend; p += boot_cpu_data.x86_clflush_size)
clflushopt(p); clflushopt(p);
mb(); mb();
......
...@@ -586,7 +586,7 @@ int free_memtype(u64 start, u64 end) ...@@ -586,7 +586,7 @@ int free_memtype(u64 start, u64 end)
entry = rbt_memtype_erase(start, end); entry = rbt_memtype_erase(start, end);
spin_unlock(&memtype_lock); spin_unlock(&memtype_lock);
if (!entry) { if (IS_ERR(entry)) {
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1); current->comm, current->pid, start, end - 1);
return -EINVAL; return -EINVAL;
...@@ -992,6 +992,16 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, ...@@ -992,6 +992,16 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
vma->vm_flags &= ~VM_PAT; vma->vm_flags &= ~VM_PAT;
} }
/*
* untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
* with the old vma after its pfnmap page table has been removed. The new
* vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
*/
void untrack_pfn_moved(struct vm_area_struct *vma)
{
vma->vm_flags &= ~VM_PAT;
}
pgprot_t pgprot_writecombine(pgprot_t prot) pgprot_t pgprot_writecombine(pgprot_t prot)
{ {
return __pgprot(pgprot_val(prot) | return __pgprot(pgprot_val(prot) |
......
...@@ -98,8 +98,13 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, ...@@ -98,8 +98,13 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
return last_lower; /* Returns NULL if there is no overlap */ return last_lower; /* Returns NULL if there is no overlap */
} }
static struct memtype *memtype_rb_exact_match(struct rb_root *root, enum {
u64 start, u64 end) MEMTYPE_EXACT_MATCH = 0,
MEMTYPE_END_MATCH = 1
};
static struct memtype *memtype_rb_match(struct rb_root *root,
u64 start, u64 end, int match_type)
{ {
struct memtype *match; struct memtype *match;
...@@ -107,7 +112,12 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root, ...@@ -107,7 +112,12 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
while (match != NULL && match->start < end) { while (match != NULL && match->start < end) {
struct rb_node *node; struct rb_node *node;
if (match->start == start && match->end == end) if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end))
return match;
if ((match_type == MEMTYPE_END_MATCH) &&
(match->start < start) && (match->end == end))
return match; return match;
node = rb_next(&match->rb); node = rb_next(&match->rb);
...@@ -117,7 +127,7 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root, ...@@ -117,7 +127,7 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
match = NULL; match = NULL;
} }
return NULL; /* Returns NULL if there is no exact match */ return NULL; /* Returns NULL if there is no match */
} }
static int memtype_rb_check_conflict(struct rb_root *root, static int memtype_rb_check_conflict(struct rb_root *root,
...@@ -210,12 +220,36 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end) ...@@ -210,12 +220,36 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
{ {
struct memtype *data; struct memtype *data;
data = memtype_rb_exact_match(&memtype_rbroot, start, end); /*
if (!data) * Since the memtype_rbroot tree allows overlapping ranges,
goto out; * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
* a whole node for the munmap case. If no such entry is found,
* it then checks with END_MATCH, i.e. shrink the size of a node
* from the end for the mremap case.
*/
data = memtype_rb_match(&memtype_rbroot, start, end,
MEMTYPE_EXACT_MATCH);
if (!data) {
data = memtype_rb_match(&memtype_rbroot, start, end,
MEMTYPE_END_MATCH);
if (!data)
return ERR_PTR(-EINVAL);
}
if (data->start == start) {
/* munmap: erase this node */
rb_erase_augmented(&data->rb, &memtype_rbroot,
&memtype_rb_augment_cb);
} else {
/* mremap: update the end value of this node */
rb_erase_augmented(&data->rb, &memtype_rbroot,
&memtype_rb_augment_cb);
data->end = start;
data->subtree_max_end = data->end;
memtype_rb_insert(&memtype_rbroot, data);
return NULL;
}
rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
out:
return data; return data;
} }
......
...@@ -203,6 +203,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) ...@@ -203,6 +203,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
(unsigned long long)start, (unsigned long long)end - 1); (unsigned long long)start, (unsigned long long)end - 1);
max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
return 0; return 0;
out_err_bad_srat: out_err_bad_srat:
bad_srat(); bad_srat();
......
...@@ -569,7 +569,7 @@ static inline int track_pfn_copy(struct vm_area_struct *vma) ...@@ -569,7 +569,7 @@ static inline int track_pfn_copy(struct vm_area_struct *vma)
} }
/* /*
* untrack_pfn_vma is called while unmapping a pfnmap for a region. * untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or * untrack can be called for a specific region indicated by pfn and size or
* can be for the entire vma (in which case pfn, size are zero). * can be for the entire vma (in which case pfn, size are zero).
*/ */
...@@ -577,6 +577,13 @@ static inline void untrack_pfn(struct vm_area_struct *vma, ...@@ -577,6 +577,13 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
unsigned long pfn, unsigned long size) unsigned long pfn, unsigned long size)
{ {
} }
/*
* untrack_pfn_moved is called while mremapping a pfnmap for a new region.
*/
static inline void untrack_pfn_moved(struct vm_area_struct *vma)
{
}
#else #else
extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr, unsigned long pfn, unsigned long addr,
...@@ -586,6 +593,7 @@ extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, ...@@ -586,6 +593,7 @@ extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
extern int track_pfn_copy(struct vm_area_struct *vma); extern int track_pfn_copy(struct vm_area_struct *vma);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size); unsigned long size);
extern void untrack_pfn_moved(struct vm_area_struct *vma);
#endif #endif
#ifdef __HAVE_COLOR_ZERO_PAGE #ifdef __HAVE_COLOR_ZERO_PAGE
......
...@@ -19,6 +19,10 @@ extern unsigned long min_low_pfn; ...@@ -19,6 +19,10 @@ extern unsigned long min_low_pfn;
* highest page * highest page
*/ */
extern unsigned long max_pfn; extern unsigned long max_pfn;
/*
* highest possible page
*/
extern unsigned long long max_possible_pfn;
#ifndef CONFIG_NO_BOOTMEM #ifndef CONFIG_NO_BOOTMEM
/* /*
......
...@@ -33,6 +33,7 @@ EXPORT_SYMBOL(contig_page_data); ...@@ -33,6 +33,7 @@ EXPORT_SYMBOL(contig_page_data);
unsigned long max_low_pfn; unsigned long max_low_pfn;
unsigned long min_low_pfn; unsigned long min_low_pfn;
unsigned long max_pfn; unsigned long max_pfn;
unsigned long long max_possible_pfn;
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
......
...@@ -319,6 +319,10 @@ static unsigned long move_vma(struct vm_area_struct *vma, ...@@ -319,6 +319,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
hiwater_vm = mm->hiwater_vm; hiwater_vm = mm->hiwater_vm;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
untrack_pfn_moved(vma);
if (do_munmap(mm, old_addr, old_len) < 0) { if (do_munmap(mm, old_addr, old_len) < 0) {
/* OOM: unable to split vma, just get accounts right */ /* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT); vm_unacct_memory(excess >> PAGE_SHIFT);
......
...@@ -31,6 +31,7 @@ EXPORT_SYMBOL(contig_page_data); ...@@ -31,6 +31,7 @@ EXPORT_SYMBOL(contig_page_data);
unsigned long max_low_pfn; unsigned long max_low_pfn;
unsigned long min_low_pfn; unsigned long min_low_pfn;
unsigned long max_pfn; unsigned long max_pfn;
unsigned long long max_possible_pfn;
static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit) u64 goal, u64 limit)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment