Commit 99792e0c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "Lots of changes in this cycle:

   - Lots of CPA (change page attribute) optimizations and related
     cleanups (Thomas Gleixner, Peter Zijstra)

   - Make lazy TLB mode even lazier (Rik van Riel)

   - Fault handler cleanups and improvements (Dave Hansen)

   - kdump, vmcore: Enable kdumping encrypted memory with AMD SME
     enabled (Lianbo Jiang)

   - Clean up VM layout documentation (Baoquan He, Ingo Molnar)

   - ... plus misc other fixes and enhancements"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (51 commits)
  x86/stackprotector: Remove the call to boot_init_stack_canary() from cpu_startup_entry()
  x86/mm: Kill stray kernel fault handling comment
  x86/mm: Do not warn about PCI BIOS W+X mappings
  resource: Clean it up a bit
  resource: Fix find_next_iomem_res() iteration issue
  resource: Include resource end in walk_*() interfaces
  x86/kexec: Correct KEXEC_BACKUP_SRC_END off-by-one error
  x86/mm: Remove spurious fault pkey check
  x86/mm/vsyscall: Consider vsyscall page part of user address space
  x86/mm: Add vsyscall address helper
  x86/mm: Fix exception table comments
  x86/mm: Add clarifying comments for user addr space
  x86/mm: Break out user address space handling
  x86/mm: Break out kernel address space handling
  x86/mm: Clarify hardware vs. software "error_code"
  x86/mm/tlb: Make lazy TLB mode lazier
  x86/mm/tlb: Add freed_tables element to flush_tlb_info
  x86/mm/tlb: Add freed_tables argument to flush_tlb_mm_range
  smp,cpumask: introduce on_each_cpu_cond_mask
  smp: use __cpumask_set_cpu in on_each_cpu_cond
  ...
parents 382d72a9 977e4be5
This diff is collapsed.
...@@ -1487,6 +1487,14 @@ config X86_DIRECT_GBPAGES ...@@ -1487,6 +1487,14 @@ config X86_DIRECT_GBPAGES
supports them), so don't confuse the user by printing supports them), so don't confuse the user by printing
that we have them enabled. that we have them enabled.
config X86_CPA_STATISTICS
bool "Enable statistic for Change Page Attribute"
depends on DEBUG_FS
---help---
Expose statistics about the Change Page Attribute mechanims, which
helps to determine the effectivness of preserving large and huge
page mappings when mapping protections are changed.
config ARCH_HAS_MEM_ENCRYPT config ARCH_HAS_MEM_ENCRYPT
def_bool y def_bool y
......
...@@ -187,11 +187,12 @@ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size) ...@@ -187,11 +187,12 @@ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size)
#define ioremap_nocache ioremap_nocache #define ioremap_nocache ioremap_nocache
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc #define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
#define ioremap_cache ioremap_cache #define ioremap_cache ioremap_cache
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
#define ioremap_prot ioremap_prot #define ioremap_prot ioremap_prot
extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size);
#define ioremap_encrypted ioremap_encrypted
/** /**
* ioremap - map bus memory into CPU space * ioremap - map bus memory into CPU space
......
...@@ -67,7 +67,7 @@ struct kimage; ...@@ -67,7 +67,7 @@ struct kimage;
/* Memory to backup during crash kdump */ /* Memory to backup during crash kdump */
#define KEXEC_BACKUP_SRC_START (0UL) #define KEXEC_BACKUP_SRC_START (0UL)
#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */ #define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
/* /*
* CPU does not save ss and sp on stack if execution is already * CPU does not save ss and sp on stack if execution is already
......
...@@ -59,13 +59,16 @@ ...@@ -59,13 +59,16 @@
#endif #endif
/* /*
* Kernel image size is limited to 1GiB due to the fixmap living in the * Maximum kernel image size is limited to 1 GiB, due to the fixmap living
* next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
* 512MiB by default, leaving 1.5GiB for modules once the page tables *
* are fully set up. If kernel ASLR is configured, it can extend the * On KASLR use 1 GiB by default, leaving 1 GiB for modules once the
* kernel page table mapping, reducing the size of the modules area. * page tables are fully set up.
*
* If KASLR is disabled we can shrink it to 0.5 GiB and increase the size
* of the modules area to 1.5 GiB.
*/ */
#if defined(CONFIG_RANDOMIZE_BASE) #ifdef CONFIG_RANDOMIZE_BASE
#define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024) #define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024)
#else #else
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
......
...@@ -6,16 +6,23 @@ ...@@ -6,16 +6,23 @@
#define tlb_end_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#define tlb_flush(tlb) \ static inline void tlb_flush(struct mmu_gather *tlb);
{ \
if (!tlb->fullmm && !tlb->need_flush_all) \
flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \
else \
flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \
}
#include <asm-generic/tlb.h> #include <asm-generic/tlb.h>
static inline void tlb_flush(struct mmu_gather *tlb)
{
unsigned long start = 0UL, end = TLB_FLUSH_ALL;
unsigned int stride_shift = tlb_get_unmap_shift(tlb);
if (!tlb->fullmm && !tlb->need_flush_all) {
start = tlb->start;
end = tlb->end;
}
flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables);
}
/* /*
* While x86 architecture in general requires an IPI to perform TLB * While x86 architecture in general requires an IPI to perform TLB
* shootdown, enablement code for several hypervisors overrides * shootdown, enablement code for several hypervisors overrides
......
...@@ -148,22 +148,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) ...@@ -148,22 +148,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) #define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
#endif #endif
static inline bool tlb_defer_switch_to_init_mm(void)
{
/*
* If we have PCID, then switching to init_mm is reasonably
* fast. If we don't have PCID, then switching to init_mm is
* quite slow, so we try to defer it in the hopes that we can
* avoid it entirely. The latter approach runs the risk of
* receiving otherwise unnecessary IPIs.
*
* This choice is just a heuristic. The tlb code can handle this
* function returning true or false regardless of whether we have
* PCID.
*/
return !static_cpu_has(X86_FEATURE_PCID);
}
struct tlb_context { struct tlb_context {
u64 ctx_id; u64 ctx_id;
u64 tlb_gen; u64 tlb_gen;
...@@ -547,23 +531,30 @@ struct flush_tlb_info { ...@@ -547,23 +531,30 @@ struct flush_tlb_info {
unsigned long start; unsigned long start;
unsigned long end; unsigned long end;
u64 new_tlb_gen; u64 new_tlb_gen;
unsigned int stride_shift;
bool freed_tables;
}; };
#define local_flush_tlb() __flush_tlb() #define local_flush_tlb() __flush_tlb()
#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) #define flush_tlb_mm(mm) \
flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true)
#define flush_tlb_range(vma, start, end) \ #define flush_tlb_range(vma, start, end) \
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) flush_tlb_mm_range((vma)->vm_mm, start, end, \
((vma)->vm_flags & VM_HUGETLB) \
? huge_page_shift(hstate_vma(vma)) \
: PAGE_SHIFT, false)
extern void flush_tlb_all(void); extern void flush_tlb_all(void);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag); unsigned long end, unsigned int stride_shift,
bool freed_tables);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
{ {
flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
} }
void native_flush_tlb_others(const struct cpumask *cpumask, void native_flush_tlb_others(const struct cpumask *cpumask,
......
...@@ -11,40 +11,62 @@ ...@@ -11,40 +11,62 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/io.h> #include <linux/io.h>
/** static ssize_t __copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
* copy_oldmem_page - copy one page from "oldmem" unsigned long offset, int userbuf,
* @pfn: page frame number to be copied bool encrypted)
* @buf: target memory address for the copy; this can be in kernel address
* space or user address space (see @userbuf)
* @csize: number of bytes to copy
* @offset: offset in bytes into the page (based on pfn) to begin the copy
* @userbuf: if set, @buf is in user address space, use copy_to_user(),
* otherwise @buf is in kernel address space, use memcpy().
*
* Copy a page from "oldmem". For this page, there is no pte mapped
* in the current kernel. We stitch up a pte, similar to kmap_atomic.
*/
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset, int userbuf)
{ {
void *vaddr; void *vaddr;
if (!csize) if (!csize)
return 0; return 0;
vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE); if (encrypted)
vaddr = (__force void *)ioremap_encrypted(pfn << PAGE_SHIFT, PAGE_SIZE);
else
vaddr = (__force void *)ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
if (!vaddr) if (!vaddr)
return -ENOMEM; return -ENOMEM;
if (userbuf) { if (userbuf) {
if (copy_to_user(buf, vaddr + offset, csize)) { if (copy_to_user((void __user *)buf, vaddr + offset, csize)) {
iounmap(vaddr); iounmap((void __iomem *)vaddr);
return -EFAULT; return -EFAULT;
} }
} else } else
memcpy(buf, vaddr + offset, csize); memcpy(buf, vaddr + offset, csize);
set_iounmap_nonlazy(); set_iounmap_nonlazy();
iounmap(vaddr); iounmap((void __iomem *)vaddr);
return csize; return csize;
} }
/**
* copy_oldmem_page - copy one page of memory
* @pfn: page frame number to be copied
* @buf: target memory address for the copy; this can be in kernel address
* space or user address space (see @userbuf)
* @csize: number of bytes to copy
* @offset: offset in bytes into the page (based on pfn) to begin the copy
* @userbuf: if set, @buf is in user address space, use copy_to_user(),
* otherwise @buf is in kernel address space, use memcpy().
*
* Copy a page from the old kernel's memory. For this page, there is no pte
* mapped in the current kernel. We stitch up a pte, similar to kmap_atomic.
*/
ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
unsigned long offset, int userbuf)
{
return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, false);
}
/**
* copy_oldmem_page_encrypted - same as copy_oldmem_page() above but ioremap the
* memory with the encryption mask set to accomodate kdump on SME-enabled
* machines.
*/
ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
unsigned long offset, int userbuf)
{
return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, true);
}
...@@ -273,7 +273,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) ...@@ -273,7 +273,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
map_ldt_struct_to_user(mm); map_ldt_struct_to_user(mm);
va = (unsigned long)ldt_slot_va(slot); va = (unsigned long)ldt_slot_va(slot);
flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, PAGE_SHIFT, false);
ldt->slot = slot; ldt->slot = slot;
return 0; return 0;
......
...@@ -199,7 +199,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) ...@@ -199,7 +199,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
pte_unmap_unlock(pte, ptl); pte_unmap_unlock(pte, ptl);
out: out:
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL); flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, PAGE_SHIFT, false);
} }
......
...@@ -19,7 +19,9 @@ ...@@ -19,7 +19,9 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/pci.h>
#include <asm/e820/types.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
/* /*
...@@ -241,6 +243,29 @@ static unsigned long normalize_addr(unsigned long u) ...@@ -241,6 +243,29 @@ static unsigned long normalize_addr(unsigned long u)
return (signed long)(u << shift) >> shift; return (signed long)(u << shift) >> shift;
} }
static void note_wx(struct pg_state *st)
{
unsigned long npages;
npages = (st->current_address - st->start_address) / PAGE_SIZE;
#ifdef CONFIG_PCI_BIOS
/*
* If PCI BIOS is enabled, the PCI BIOS area is forced to WX.
* Inform about it, but avoid the warning.
*/
if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
st->current_address <= PAGE_OFFSET + BIOS_END) {
pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages);
return;
}
#endif
/* Account the WX pages */
st->wx_pages += npages;
WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
(void *)st->start_address);
}
/* /*
* This function gets called on a break in a continuous series * This function gets called on a break in a continuous series
* of PTE entries; the next one is different so we need to * of PTE entries; the next one is different so we need to
...@@ -276,14 +301,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, ...@@ -276,14 +301,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
unsigned long delta; unsigned long delta;
int width = sizeof(unsigned long) * 2; int width = sizeof(unsigned long) * 2;
if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) { if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
WARN_ONCE(1, note_wx(st);
"x86/mm: Found insecure W+X mapping at address %p/%pS\n",
(void *)st->start_address,
(void *)st->start_address);
st->wx_pages += (st->current_address -
st->start_address) / PAGE_SIZE;
}
/* /*
* Now print the actual finished series * Now print the actual finished series
......
This diff is collapsed.
...@@ -923,34 +923,19 @@ static void mark_nxdata_nx(void) ...@@ -923,34 +923,19 @@ static void mark_nxdata_nx(void)
void mark_rodata_ro(void) void mark_rodata_ro(void)
{ {
unsigned long start = PFN_ALIGN(_text); unsigned long start = PFN_ALIGN(_text);
unsigned long size = PFN_ALIGN(_etext) - start; unsigned long size = (unsigned long)__end_rodata - start;
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel text: %luk\n", pr_info("Write protecting kernel text and read-only data: %luk\n",
size >> 10); size >> 10);
kernel_set_to_readonly = 1; kernel_set_to_readonly = 1;
#ifdef CONFIG_CPA_DEBUG #ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size);
start, start+size);
set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
printk(KERN_INFO "Testing CPA: write protecting again\n");
set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
#endif
start += size;
size = (unsigned long)__end_rodata - start;
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
size >> 10);
#ifdef CONFIG_CPA_DEBUG
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Testing CPA: write protecting again\n"); pr_info("Testing CPA: write protecting again\n");
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
#endif #endif
mark_nxdata_nx(); mark_nxdata_nx();
......
...@@ -131,7 +131,8 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, ...@@ -131,7 +131,8 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
* caller shouldn't need to know that small detail. * caller shouldn't need to know that small detail.
*/ */
static void __iomem *__ioremap_caller(resource_size_t phys_addr, static void __iomem *__ioremap_caller(resource_size_t phys_addr,
unsigned long size, enum page_cache_mode pcm, void *caller) unsigned long size, enum page_cache_mode pcm,
void *caller, bool encrypted)
{ {
unsigned long offset, vaddr; unsigned long offset, vaddr;
resource_size_t last_addr; resource_size_t last_addr;
...@@ -199,7 +200,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, ...@@ -199,7 +200,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* resulting mapping. * resulting mapping.
*/ */
prot = PAGE_KERNEL_IO; prot = PAGE_KERNEL_IO;
if (sev_active() && mem_flags.desc_other) if ((sev_active() && mem_flags.desc_other) || encrypted)
prot = pgprot_encrypted(prot); prot = pgprot_encrypted(prot);
switch (pcm) { switch (pcm) {
...@@ -291,7 +292,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) ...@@ -291,7 +292,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS; enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
return __ioremap_caller(phys_addr, size, pcm, return __ioremap_caller(phys_addr, size, pcm,
__builtin_return_address(0)); __builtin_return_address(0), false);
} }
EXPORT_SYMBOL(ioremap_nocache); EXPORT_SYMBOL(ioremap_nocache);
...@@ -324,7 +325,7 @@ void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size) ...@@ -324,7 +325,7 @@ void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC; enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
return __ioremap_caller(phys_addr, size, pcm, return __ioremap_caller(phys_addr, size, pcm,
__builtin_return_address(0)); __builtin_return_address(0), false);
} }
EXPORT_SYMBOL_GPL(ioremap_uc); EXPORT_SYMBOL_GPL(ioremap_uc);
...@@ -341,7 +342,7 @@ EXPORT_SYMBOL_GPL(ioremap_uc); ...@@ -341,7 +342,7 @@ EXPORT_SYMBOL_GPL(ioremap_uc);
void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
{ {
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC, return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
__builtin_return_address(0)); __builtin_return_address(0), false);
} }
EXPORT_SYMBOL(ioremap_wc); EXPORT_SYMBOL(ioremap_wc);
...@@ -358,14 +359,21 @@ EXPORT_SYMBOL(ioremap_wc); ...@@ -358,14 +359,21 @@ EXPORT_SYMBOL(ioremap_wc);
void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size) void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
{ {
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT, return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
__builtin_return_address(0)); __builtin_return_address(0), false);
} }
EXPORT_SYMBOL(ioremap_wt); EXPORT_SYMBOL(ioremap_wt);
void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size)
{
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
__builtin_return_address(0), true);
}
EXPORT_SYMBOL(ioremap_encrypted);
void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
{ {
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB, return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
__builtin_return_address(0)); __builtin_return_address(0), false);
} }
EXPORT_SYMBOL(ioremap_cache); EXPORT_SYMBOL(ioremap_cache);
...@@ -374,7 +382,7 @@ void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, ...@@ -374,7 +382,7 @@ void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
{ {
return __ioremap_caller(phys_addr, size, return __ioremap_caller(phys_addr, size,
pgprot2cachemode(__pgprot(prot_val)), pgprot2cachemode(__pgprot(prot_val)),
__builtin_return_address(0)); __builtin_return_address(0), false);
} }
EXPORT_SYMBOL(ioremap_prot); EXPORT_SYMBOL(ioremap_prot);
......
This diff is collapsed.
...@@ -185,8 +185,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -185,8 +185,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
{ {
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
unsigned cpu = smp_processor_id(); unsigned cpu = smp_processor_id();
u64 next_tlb_gen; u64 next_tlb_gen;
bool need_flush;
u16 new_asid;
/* /*
* NB: The scheduler will call us with prev == next when switching * NB: The scheduler will call us with prev == next when switching
...@@ -240,20 +243,41 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -240,20 +243,41 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
next->context.ctx_id); next->context.ctx_id);
/* /*
* We don't currently support having a real mm loaded without * Even in lazy TLB mode, the CPU should stay set in the
* our cpu set in mm_cpumask(). We have all the bookkeeping * mm_cpumask. The TLB shootdown code can figure out from
* in place to figure out whether we would need to flush * from cpu_tlbstate.is_lazy whether or not to send an IPI.
* if our cpu were cleared in mm_cpumask(), but we don't
* currently use it.
*/ */
if (WARN_ON_ONCE(real_prev != &init_mm && if (WARN_ON_ONCE(real_prev != &init_mm &&
!cpumask_test_cpu(cpu, mm_cpumask(next)))) !cpumask_test_cpu(cpu, mm_cpumask(next))))
cpumask_set_cpu(cpu, mm_cpumask(next)); cpumask_set_cpu(cpu, mm_cpumask(next));
return; /*
* If the CPU is not in lazy TLB mode, we are just switching
* from one thread in a process to another thread in the same
* process. No TLB flush required.
*/
if (!was_lazy)
return;
/*
* Read the tlb_gen to check whether a flush is needed.
* If the TLB is up to date, just use it.
* The barrier synchronizes with the tlb_gen increment in
* the TLB shootdown code.
*/
smp_mb();
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
next_tlb_gen)
return;
/*
* TLB contents went out of date while we were in lazy
* mode. Fall through to the TLB switching code below.
*/
new_asid = prev_asid;
need_flush = true;
} else { } else {
u16 new_asid;
bool need_flush;
u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
/* /*
...@@ -308,46 +332,48 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -308,46 +332,48 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
/* Let nmi_uaccess_okay() know that we're changing CR3. */ /* Let nmi_uaccess_okay() know that we're changing CR3. */
this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
barrier(); barrier();
}
if (need_flush) { if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
load_new_mm_cr3(next->pgd, new_asid, true); load_new_mm_cr3(next->pgd, new_asid, true);
/*
* NB: This gets called via leave_mm() in the idle path
* where RCU functions differently. Tracing normally
* uses RCU, so we need to use the _rcuidle variant.
*
* (There is no good reason for this. The idle code should
* be rearranged to call this before rcu_idle_enter().)
*/
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
load_new_mm_cr3(next->pgd, new_asid, false);
/* See above wrt _rcuidle. */
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
/* /*
* Record last user mm's context id, so we can avoid * NB: This gets called via leave_mm() in the idle path
* flushing branch buffer with IBPB if we switch back * where RCU functions differently. Tracing normally
* to the same user. * uses RCU, so we need to use the _rcuidle variant.
*
* (There is no good reason for this. The idle code should
* be rearranged to call this before rcu_idle_enter().)
*/ */
if (next != &init_mm) trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); } else {
/* The new ASID is already up to date. */
/* Make sure we write CR3 before loaded_mm. */ load_new_mm_cr3(next->pgd, new_asid, false);
barrier();
this_cpu_write(cpu_tlbstate.loaded_mm, next); /* See above wrt _rcuidle. */
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
} }
load_mm_cr4(next); /*
switch_ldt(real_prev, next); * Record last user mm's context id, so we can avoid
* flushing branch buffer with IBPB if we switch back
* to the same user.
*/
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
/* Make sure we write CR3 before loaded_mm. */
barrier();
this_cpu_write(cpu_tlbstate.loaded_mm, next);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
if (next != real_prev) {
load_mm_cr4(next);
switch_ldt(real_prev, next);
}
} }
/* /*
...@@ -368,20 +394,7 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) ...@@ -368,20 +394,7 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
return; return;
if (tlb_defer_switch_to_init_mm()) { this_cpu_write(cpu_tlbstate.is_lazy, true);
/*
* There's a significant optimization that may be possible
* here. We have accurate enough TLB flush tracking that we
* don't need to maintain coherence of TLB per se when we're
* lazy. We do, however, need to maintain coherence of
* paging-structure caches. We could, in principle, leave our
* old mm loaded and only switch to init_mm when
* tlb_remove_page() happens.
*/
this_cpu_write(cpu_tlbstate.is_lazy, true);
} else {
switch_mm(NULL, &init_mm, NULL);
}
} }
/* /*
...@@ -468,6 +481,9 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, ...@@ -468,6 +481,9 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
* paging-structure cache to avoid speculatively reading * paging-structure cache to avoid speculatively reading
* garbage into our TLB. Since switching to init_mm is barely * garbage into our TLB. Since switching to init_mm is barely
* slower than a minimal flush, just switch to init_mm. * slower than a minimal flush, just switch to init_mm.
*
* This should be rare, with native_flush_tlb_others skipping
* IPIs to lazy TLB mode CPUs.
*/ */
switch_mm_irqs_off(NULL, &init_mm, NULL); switch_mm_irqs_off(NULL, &init_mm, NULL);
return; return;
...@@ -528,17 +544,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, ...@@ -528,17 +544,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
f->new_tlb_gen == local_tlb_gen + 1 && f->new_tlb_gen == local_tlb_gen + 1 &&
f->new_tlb_gen == mm_tlb_gen) { f->new_tlb_gen == mm_tlb_gen) {
/* Partial flush */ /* Partial flush */
unsigned long addr; unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift;
unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT; unsigned long addr = f->start;
addr = f->start;
while (addr < f->end) { while (addr < f->end) {
__flush_tlb_one_user(addr); __flush_tlb_one_user(addr);
addr += PAGE_SIZE; addr += 1UL << f->stride_shift;
} }
if (local) if (local)
count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages); count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
trace_tlb_flush(reason, nr_pages); trace_tlb_flush(reason, nr_invalidate);
} else { } else {
/* Full flush. */ /* Full flush. */
local_flush_tlb(); local_flush_tlb();
...@@ -571,6 +586,11 @@ static void flush_tlb_func_remote(void *info) ...@@ -571,6 +586,11 @@ static void flush_tlb_func_remote(void *info)
flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN); flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
} }
static bool tlb_is_not_lazy(int cpu, void *data)
{
return !per_cpu(cpu_tlbstate.is_lazy, cpu);
}
void native_flush_tlb_others(const struct cpumask *cpumask, void native_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info) const struct flush_tlb_info *info)
{ {
...@@ -606,8 +626,23 @@ void native_flush_tlb_others(const struct cpumask *cpumask, ...@@ -606,8 +626,23 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
(void *)info, 1); (void *)info, 1);
return; return;
} }
smp_call_function_many(cpumask, flush_tlb_func_remote,
/*
* If no page tables were freed, we can skip sending IPIs to
* CPUs in lazy TLB mode. They will flush the CPU themselves
* at the next context switch.
*
* However, if page tables are getting freed, we need to send the
* IPI everywhere, to prevent CPUs in lazy TLB mode from tripping
* up on the new contents of what used to be page tables, while
* doing a speculative memory access.
*/
if (info->freed_tables)
smp_call_function_many(cpumask, flush_tlb_func_remote,
(void *)info, 1); (void *)info, 1);
else
on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
(void *)info, 1, GFP_ATOMIC, cpumask);
} }
/* /*
...@@ -623,12 +658,15 @@ void native_flush_tlb_others(const struct cpumask *cpumask, ...@@ -623,12 +658,15 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag) unsigned long end, unsigned int stride_shift,
bool freed_tables)
{ {
int cpu; int cpu;
struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
.mm = mm, .mm = mm,
.stride_shift = stride_shift,
.freed_tables = freed_tables,
}; };
cpu = get_cpu(); cpu = get_cpu();
...@@ -638,8 +676,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, ...@@ -638,8 +676,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
/* Should we flush just the requested range? */ /* Should we flush just the requested range? */
if ((end != TLB_FLUSH_ALL) && if ((end != TLB_FLUSH_ALL) &&
!(vmflag & VM_HUGETLB) && ((end - start) >> stride_shift) <= tlb_single_page_flush_ceiling) {
((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
info.start = start; info.start = start;
info.end = end; info.end = end;
} else { } else {
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/cpuhotplug.h> #include <linux/cpuhotplug.h>
#include <linux/stackprotector.h>
#include <asm/paravirt.h> #include <asm/paravirt.h>
#include <asm/desc.h> #include <asm/desc.h>
...@@ -88,6 +89,7 @@ static void cpu_bringup(void) ...@@ -88,6 +89,7 @@ static void cpu_bringup(void)
asmlinkage __visible void cpu_bringup_and_idle(void) asmlinkage __visible void cpu_bringup_and_idle(void)
{ {
cpu_bringup(); cpu_bringup();
boot_init_stack_canary();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
} }
......
...@@ -902,12 +902,22 @@ static bool copy_device_table(void) ...@@ -902,12 +902,22 @@ static bool copy_device_table(void)
} }
} }
old_devtb_phys = entry & PAGE_MASK; /*
* When SME is enabled in the first kernel, the entry includes the
* memory encryption mask(sme_me_mask), we must remove the memory
* encryption mask to obtain the true physical address in kdump kernel.
*/
old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
if (old_devtb_phys >= 0x100000000ULL) { if (old_devtb_phys >= 0x100000000ULL) {
pr_err("The address of old device table is above 4G, not trustworthy!\n"); pr_err("The address of old device table is above 4G, not trustworthy!\n");
return false; return false;
} }
old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); old_devtb = (sme_active() && is_kdump_kernel())
? (__force void *)ioremap_encrypted(old_devtb_phys,
dev_table_size)
: memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
if (!old_devtb) if (!old_devtb)
return false; return false;
......
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/mem_encrypt.h>
#include <asm/pgtable.h>
#include <asm/io.h> #include <asm/io.h>
#include "internal.h" #include "internal.h"
...@@ -98,7 +100,8 @@ static int pfn_is_ram(unsigned long pfn) ...@@ -98,7 +100,8 @@ static int pfn_is_ram(unsigned long pfn)
/* Reads a page from the oldmem device from given offset. */ /* Reads a page from the oldmem device from given offset. */
static ssize_t read_from_oldmem(char *buf, size_t count, static ssize_t read_from_oldmem(char *buf, size_t count,
u64 *ppos, int userbuf) u64 *ppos, int userbuf,
bool encrypted)
{ {
unsigned long pfn, offset; unsigned long pfn, offset;
size_t nr_bytes; size_t nr_bytes;
...@@ -120,8 +123,15 @@ static ssize_t read_from_oldmem(char *buf, size_t count, ...@@ -120,8 +123,15 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
if (pfn_is_ram(pfn) == 0) if (pfn_is_ram(pfn) == 0)
memset(buf, 0, nr_bytes); memset(buf, 0, nr_bytes);
else { else {
tmp = copy_oldmem_page(pfn, buf, nr_bytes, if (encrypted)
offset, userbuf); tmp = copy_oldmem_page_encrypted(pfn, buf,
nr_bytes,
offset,
userbuf);
else
tmp = copy_oldmem_page(pfn, buf, nr_bytes,
offset, userbuf);
if (tmp < 0) if (tmp < 0)
return tmp; return tmp;
} }
...@@ -155,7 +165,7 @@ void __weak elfcorehdr_free(unsigned long long addr) ...@@ -155,7 +165,7 @@ void __weak elfcorehdr_free(unsigned long long addr)
*/ */
ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{ {
return read_from_oldmem(buf, count, ppos, 0); return read_from_oldmem(buf, count, ppos, 0, false);
} }
/* /*
...@@ -163,7 +173,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) ...@@ -163,7 +173,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
*/ */
ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
{ {
return read_from_oldmem(buf, count, ppos, 0); return read_from_oldmem(buf, count, ppos, 0, sme_active());
} }
/* /*
...@@ -173,9 +183,20 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, ...@@ -173,9 +183,20 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
unsigned long from, unsigned long pfn, unsigned long from, unsigned long pfn,
unsigned long size, pgprot_t prot) unsigned long size, pgprot_t prot)
{ {
prot = pgprot_encrypted(prot);
return remap_pfn_range(vma, from, pfn, size, prot); return remap_pfn_range(vma, from, pfn, size, prot);
} }
/*
* Architectures which support memory encryption override this.
*/
ssize_t __weak
copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
unsigned long offset, int userbuf)
{
return copy_oldmem_page(pfn, buf, csize, offset, userbuf);
}
/* /*
* Copy to either kernel or user space * Copy to either kernel or user space
*/ */
...@@ -351,7 +372,8 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, ...@@ -351,7 +372,8 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
m->offset + m->size - *fpos, m->offset + m->size - *fpos,
buflen); buflen);
start = m->paddr + *fpos - m->offset; start = m->paddr + *fpos - m->offset;
tmp = read_from_oldmem(buffer, tsz, &start, userbuf); tmp = read_from_oldmem(buffer, tsz, &start,
userbuf, sme_active());
if (tmp < 0) if (tmp < 0)
return tmp; return tmp;
buflen -= tsz; buflen -= tsz;
......
...@@ -26,6 +26,10 @@ extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, ...@@ -26,6 +26,10 @@ extern int remap_oldmem_pfn_range(struct vm_area_struct *vma,
extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
unsigned long, int); unsigned long, int);
extern ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf,
size_t csize, unsigned long offset,
int userbuf);
void vmcore_cleanup(void); void vmcore_cleanup(void);
/* Architecture code defines this if there are other possible ELF /* Architecture code defines this if there are other possible ELF
......
...@@ -53,6 +53,10 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), ...@@ -53,6 +53,10 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait, smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags); gfp_t gfp_flags);
void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags, const struct cpumask *mask);
int smp_call_function_single_async(int cpu, call_single_data_t *csd); int smp_call_function_single_async(int cpu, call_single_data_t *csd);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -471,6 +471,10 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, ...@@ -471,6 +471,10 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
} }
} }
/* Ensure that these pages are decrypted if SME is enabled. */
if (pages)
arch_kexec_post_alloc_pages(page_address(pages), 1 << order, 0);
return pages; return pages;
} }
...@@ -867,6 +871,7 @@ static int kimage_load_crash_segment(struct kimage *image, ...@@ -867,6 +871,7 @@ static int kimage_load_crash_segment(struct kimage *image,
result = -ENOMEM; result = -ENOMEM;
goto out; goto out;
} }
arch_kexec_post_alloc_pages(page_address(page), 1, 0);
ptr = kmap(page); ptr = kmap(page);
ptr += maddr & ~PAGE_MASK; ptr += maddr & ~PAGE_MASK;
mchunk = min_t(size_t, mbytes, mchunk = min_t(size_t, mbytes,
...@@ -884,6 +889,7 @@ static int kimage_load_crash_segment(struct kimage *image, ...@@ -884,6 +889,7 @@ static int kimage_load_crash_segment(struct kimage *image,
result = copy_from_user(ptr, buf, uchunk); result = copy_from_user(ptr, buf, uchunk);
kexec_flush_icache_page(page); kexec_flush_icache_page(page);
kunmap(page); kunmap(page);
arch_kexec_pre_free_pages(page_address(page), 1);
if (result) { if (result) {
result = -EFAULT; result = -EFAULT;
goto out; goto out;
......
...@@ -318,33 +318,34 @@ int release_resource(struct resource *old) ...@@ -318,33 +318,34 @@ int release_resource(struct resource *old)
EXPORT_SYMBOL(release_resource); EXPORT_SYMBOL(release_resource);
/* /**
* Finds the lowest iomem resource existing within [res->start.res->end). * Finds the lowest iomem resource that covers part of [start..end]. The
* The caller must specify res->start, res->end, res->flags, and optionally * caller must specify start, end, flags, and desc (which may be
* desc. If found, returns 0, res is overwritten, if not found, returns -1. * IORES_DESC_NONE).
* This function walks the whole tree and not just first level children until *
* and unless first_level_children_only is true. * If a resource is found, returns 0 and *res is overwritten with the part
* of the resource that's within [start..end]; if none is found, returns
* -1.
*
* This function walks the whole tree and not just first level children
* unless @first_lvl is true.
*/ */
static int find_next_iomem_res(struct resource *res, unsigned long desc, static int find_next_iomem_res(resource_size_t start, resource_size_t end,
bool first_level_children_only) unsigned long flags, unsigned long desc,
bool first_lvl, struct resource *res)
{ {
resource_size_t start, end;
struct resource *p; struct resource *p;
bool sibling_only = false;
BUG_ON(!res); if (!res)
return -EINVAL;
start = res->start;
end = res->end;
BUG_ON(start >= end);
if (first_level_children_only) if (start >= end)
sibling_only = true; return -EINVAL;
read_lock(&resource_lock); read_lock(&resource_lock);
for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) { for (p = iomem_resource.child; p; p = next_resource(p, first_lvl)) {
if ((p->flags & res->flags) != res->flags) if ((p->flags & flags) != flags)
continue; continue;
if ((desc != IORES_DESC_NONE) && (desc != p->desc)) if ((desc != IORES_DESC_NONE) && (desc != p->desc))
continue; continue;
...@@ -352,45 +353,43 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc, ...@@ -352,45 +353,43 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc,
p = NULL; p = NULL;
break; break;
} }
if ((p->end >= start) && (p->start < end)) if ((p->end >= start) && (p->start <= end))
break; break;
} }
read_unlock(&resource_lock); read_unlock(&resource_lock);
if (!p) if (!p)
return -1; return -1;
/* copy data */ /* copy data */
if (res->start < p->start) res->start = max(start, p->start);
res->start = p->start; res->end = min(end, p->end);
if (res->end > p->end)
res->end = p->end;
res->flags = p->flags; res->flags = p->flags;
res->desc = p->desc; res->desc = p->desc;
return 0; return 0;
} }
static int __walk_iomem_res_desc(struct resource *res, unsigned long desc, static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
bool first_level_children_only, unsigned long flags, unsigned long desc,
void *arg, bool first_lvl, void *arg,
int (*func)(struct resource *, void *)) int (*func)(struct resource *, void *))
{ {
u64 orig_end = res->end; struct resource res;
int ret = -1; int ret = -1;
while ((res->start < res->end) && while (start < end &&
!find_next_iomem_res(res, desc, first_level_children_only)) { !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) {
ret = (*func)(res, arg); ret = (*func)(&res, arg);
if (ret) if (ret)
break; break;
res->start = res->end + 1; start = res.end + 1;
res->end = orig_end;
} }
return ret; return ret;
} }
/* /**
* Walks through iomem resources and calls func() with matching resource * Walks through iomem resources and calls func() with matching resource
* ranges. This walks through whole tree and not just first level children. * ranges. This walks through whole tree and not just first level children.
* All the memory ranges which overlap start,end and also match flags and * All the memory ranges which overlap start,end and also match flags and
...@@ -407,13 +406,7 @@ static int __walk_iomem_res_desc(struct resource *res, unsigned long desc, ...@@ -407,13 +406,7 @@ static int __walk_iomem_res_desc(struct resource *res, unsigned long desc,
int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
u64 end, void *arg, int (*func)(struct resource *, void *)) u64 end, void *arg, int (*func)(struct resource *, void *))
{ {
struct resource res; return __walk_iomem_res_desc(start, end, flags, desc, false, arg, func);
res.start = start;
res.end = end;
res.flags = flags;
return __walk_iomem_res_desc(&res, desc, false, arg, func);
} }
EXPORT_SYMBOL_GPL(walk_iomem_res_desc); EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
...@@ -425,15 +418,11 @@ EXPORT_SYMBOL_GPL(walk_iomem_res_desc); ...@@ -425,15 +418,11 @@ EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
* ranges. * ranges.
*/ */
int walk_system_ram_res(u64 start, u64 end, void *arg, int walk_system_ram_res(u64 start, u64 end, void *arg,
int (*func)(struct resource *, void *)) int (*func)(struct resource *, void *))
{ {
struct resource res; unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
res.start = start; return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true,
res.end = end;
res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
return __walk_iomem_res_desc(&res, IORES_DESC_NONE, true,
arg, func); arg, func);
} }
...@@ -444,13 +433,9 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, ...@@ -444,13 +433,9 @@ int walk_system_ram_res(u64 start, u64 end, void *arg,
int walk_mem_res(u64 start, u64 end, void *arg, int walk_mem_res(u64 start, u64 end, void *arg,
int (*func)(struct resource *, void *)) int (*func)(struct resource *, void *))
{ {
struct resource res; unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
res.start = start;
res.end = end;
res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
return __walk_iomem_res_desc(&res, IORES_DESC_NONE, true, return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true,
arg, func); arg, func);
} }
...@@ -462,27 +447,27 @@ int walk_mem_res(u64 start, u64 end, void *arg, ...@@ -462,27 +447,27 @@ int walk_mem_res(u64 start, u64 end, void *arg,
* It is to be used only for System RAM. * It is to be used only for System RAM.
*/ */
int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg, int (*func)(unsigned long, unsigned long, void *)) void *arg, int (*func)(unsigned long, unsigned long, void *))
{ {
resource_size_t start, end;
unsigned long flags;
struct resource res; struct resource res;
unsigned long pfn, end_pfn; unsigned long pfn, end_pfn;
u64 orig_end;
int ret = -1; int ret = -1;
res.start = (u64) start_pfn << PAGE_SHIFT; start = (u64) start_pfn << PAGE_SHIFT;
res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
orig_end = res.end; while (start < end &&
while ((res.start < res.end) && !find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
(find_next_iomem_res(&res, IORES_DESC_NONE, true) >= 0)) { true, &res)) {
pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
end_pfn = (res.end + 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT;
if (end_pfn > pfn) if (end_pfn > pfn)
ret = (*func)(pfn, end_pfn - pfn, arg); ret = (*func)(pfn, end_pfn - pfn, arg);
if (ret) if (ret)
break; break;
res.start = res.end + 1; start = res.end + 1;
res.end = orig_end;
} }
return ret; return ret;
} }
...@@ -658,8 +643,8 @@ static int find_resource(struct resource *root, struct resource *new, ...@@ -658,8 +643,8 @@ static int find_resource(struct resource *root, struct resource *new,
* @constraint: the size and alignment constraints to be met. * @constraint: the size and alignment constraints to be met.
*/ */
static int reallocate_resource(struct resource *root, struct resource *old, static int reallocate_resource(struct resource *root, struct resource *old,
resource_size_t newsize, resource_size_t newsize,
struct resource_constraint *constraint) struct resource_constraint *constraint)
{ {
int err=0; int err=0;
struct resource new = *old; struct resource new = *old;
...@@ -972,7 +957,7 @@ static int __adjust_resource(struct resource *res, resource_size_t start, ...@@ -972,7 +957,7 @@ static int __adjust_resource(struct resource *res, resource_size_t start,
* Existing children of the resource are assumed to be immutable. * Existing children of the resource are assumed to be immutable.
*/ */
int adjust_resource(struct resource *res, resource_size_t start, int adjust_resource(struct resource *res, resource_size_t start,
resource_size_t size) resource_size_t size)
{ {
int result; int result;
...@@ -983,9 +968,9 @@ int adjust_resource(struct resource *res, resource_size_t start, ...@@ -983,9 +968,9 @@ int adjust_resource(struct resource *res, resource_size_t start,
} }
EXPORT_SYMBOL(adjust_resource); EXPORT_SYMBOL(adjust_resource);
static void __init __reserve_region_with_split(struct resource *root, static void __init
resource_size_t start, resource_size_t end, __reserve_region_with_split(struct resource *root, resource_size_t start,
const char *name) resource_size_t end, const char *name)
{ {
struct resource *parent = root; struct resource *parent = root;
struct resource *conflict; struct resource *conflict;
...@@ -1044,9 +1029,9 @@ static void __init __reserve_region_with_split(struct resource *root, ...@@ -1044,9 +1029,9 @@ static void __init __reserve_region_with_split(struct resource *root,
} }
void __init reserve_region_with_split(struct resource *root, void __init
resource_size_t start, resource_size_t end, reserve_region_with_split(struct resource *root, resource_size_t start,
const char *name) resource_size_t end, const char *name)
{ {
int abort = 0; int abort = 0;
...@@ -1172,7 +1157,7 @@ EXPORT_SYMBOL(__request_region); ...@@ -1172,7 +1157,7 @@ EXPORT_SYMBOL(__request_region);
* The described resource region must match a currently busy region. * The described resource region must match a currently busy region.
*/ */
void __release_region(struct resource *parent, resource_size_t start, void __release_region(struct resource *parent, resource_size_t start,
resource_size_t n) resource_size_t n)
{ {
struct resource **p; struct resource **p;
resource_size_t end; resource_size_t end;
...@@ -1234,7 +1219,7 @@ EXPORT_SYMBOL(__release_region); ...@@ -1234,7 +1219,7 @@ EXPORT_SYMBOL(__release_region);
* simplicity. Enhance this logic when necessary. * simplicity. Enhance this logic when necessary.
*/ */
int release_mem_region_adjustable(struct resource *parent, int release_mem_region_adjustable(struct resource *parent,
resource_size_t start, resource_size_t size) resource_size_t start, resource_size_t size)
{ {
struct resource **p; struct resource **p;
struct resource *res; struct resource *res;
...@@ -1410,9 +1395,9 @@ static int devm_region_match(struct device *dev, void *res, void *match_data) ...@@ -1410,9 +1395,9 @@ static int devm_region_match(struct device *dev, void *res, void *match_data)
this->start == match->start && this->n == match->n; this->start == match->start && this->n == match->n;
} }
struct resource * __devm_request_region(struct device *dev, struct resource *
struct resource *parent, resource_size_t start, __devm_request_region(struct device *dev, struct resource *parent,
resource_size_t n, const char *name) resource_size_t start, resource_size_t n, const char *name)
{ {
struct region_devres *dr = NULL; struct region_devres *dr = NULL;
struct resource *res; struct resource *res;
......
...@@ -347,21 +347,6 @@ EXPORT_SYMBOL_GPL(play_idle); ...@@ -347,21 +347,6 @@ EXPORT_SYMBOL_GPL(play_idle);
void cpu_startup_entry(enum cpuhp_state state) void cpu_startup_entry(enum cpuhp_state state)
{ {
/*
* This #ifdef needs to die, but it's too late in the cycle to
* make this generic (ARM and SH have never invoked the canary
* init for the non boot CPUs!). Will be fixed in 3.11
*/
#ifdef CONFIG_X86
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. The boot CPU already has it initialized but no harm
* in doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
#endif
arch_cpu_idle_prepare(); arch_cpu_idle_prepare();
cpuhp_online_idle(state); cpuhp_online_idle(state);
while (1) while (1)
......
...@@ -56,7 +56,6 @@ ...@@ -56,7 +56,6 @@
#include <linux/profile.h> #include <linux/profile.h>
#include <linux/rcupdate_wait.h> #include <linux/rcupdate_wait.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/stackprotector.h>
#include <linux/stop_machine.h> #include <linux/stop_machine.h>
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/swait.h> #include <linux/swait.h>
......
...@@ -669,9 +669,9 @@ EXPORT_SYMBOL(on_each_cpu_mask); ...@@ -669,9 +669,9 @@ EXPORT_SYMBOL(on_each_cpu_mask);
* You must not call this function with disabled interrupts or * You must not call this function with disabled interrupts or
* from a hardware interrupt handler or from a bottom half handler. * from a hardware interrupt handler or from a bottom half handler.
*/ */
void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait, smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags) gfp_t gfp_flags, const struct cpumask *mask)
{ {
cpumask_var_t cpus; cpumask_var_t cpus;
int cpu, ret; int cpu, ret;
...@@ -680,9 +680,9 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), ...@@ -680,9 +680,9 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) { if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
preempt_disable(); preempt_disable();
for_each_online_cpu(cpu) for_each_cpu(cpu, mask)
if (cond_func(cpu, info)) if (cond_func(cpu, info))
cpumask_set_cpu(cpu, cpus); __cpumask_set_cpu(cpu, cpus);
on_each_cpu_mask(cpus, func, info, wait); on_each_cpu_mask(cpus, func, info, wait);
preempt_enable(); preempt_enable();
free_cpumask_var(cpus); free_cpumask_var(cpus);
...@@ -692,7 +692,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), ...@@ -692,7 +692,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
* just have to IPI them one by one. * just have to IPI them one by one.
*/ */
preempt_disable(); preempt_disable();
for_each_online_cpu(cpu) for_each_cpu(cpu, mask)
if (cond_func(cpu, info)) { if (cond_func(cpu, info)) {
ret = smp_call_function_single(cpu, func, ret = smp_call_function_single(cpu, func,
info, wait); info, wait);
...@@ -701,6 +701,15 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), ...@@ -701,6 +701,15 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
preempt_enable(); preempt_enable();
} }
} }
EXPORT_SYMBOL(on_each_cpu_cond_mask);
void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags)
{
on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags,
cpu_online_mask);
}
EXPORT_SYMBOL(on_each_cpu_cond); EXPORT_SYMBOL(on_each_cpu_cond);
static void do_nothing(void *unused) static void do_nothing(void *unused)
......
...@@ -68,9 +68,9 @@ EXPORT_SYMBOL(on_each_cpu_mask); ...@@ -68,9 +68,9 @@ EXPORT_SYMBOL(on_each_cpu_mask);
* Preemption is disabled here to make sure the cond_func is called under the * Preemption is disabled here to make sure the cond_func is called under the
* same condtions in UP and SMP. * same condtions in UP and SMP.
*/ */
void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait, smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags) gfp_t gfp_flags, const struct cpumask *mask)
{ {
unsigned long flags; unsigned long flags;
...@@ -82,6 +82,14 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), ...@@ -82,6 +82,14 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
} }
preempt_enable(); preempt_enable();
} }
EXPORT_SYMBOL(on_each_cpu_cond_mask);
void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags)
{
on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, NULL);
}
EXPORT_SYMBOL(on_each_cpu_cond); EXPORT_SYMBOL(on_each_cpu_cond);
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys) int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
*/ */
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/hugetlb.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm-generic/pgtable.h> #include <asm-generic/pgtable.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment