Commit 1de4fa14 authored by Dave Hansen's avatar Dave Hansen Committed by Thomas Gleixner

x86, mpx: Cleanup unused bound tables

The previous patch allocates bounds tables on-demand.  As noted in
an earlier description, these can add up to *HUGE* amounts of
memory.  This has caused OOMs in practice when running tests.

This patch adds support for freeing bounds tables when they are no
longer in use.

There are two types of mappings in play when unmapping tables:
 1. The mapping with the actual data, which userspace is
    munmap()ing or brk()ing away, etc...
 2. The mapping for the bounds table *backing* the data
    (is tagged with VM_MPX, see the patch "add MPX specific
    mmap interface").

If userspace use the prctl() indroduced earlier in this patchset
to enable the management of bounds tables in kernel, when it
unmaps the first type of mapping with the actual data, the kernel
needs to free the mapping for the bounds table backing the data.
This patch hooks in at the very end of do_unmap() to do so.
We look at the addresses being unmapped and find the bounds
directory entries and tables which cover those addresses.  If
an entire table is unused, we clear associated directory entry
and free the table.

Once we unmap the bounds table, we would have a bounds directory
entry pointing at empty address space. That address space might
now be allocated for some other (random) use, and the MPX
hardware might now try to walk it as if it were a bounds table.
That would be bad.  So any unmapping of an enture bounds table
has to be accompanied by a corresponding write to the bounds
directory entry to invalidate it.  That write to the bounds
directory can fault, which causes the following problem:

Since we are doing the freeing from munmap() (and other paths
like it), we hold mmap_sem for write. If we fault, the page
fault handler will attempt to acquire mmap_sem for read and
we will deadlock.  To avoid the deadlock, we pagefault_disable()
when touching the bounds directory entry and use a
get_user_pages() to resolve the fault.

The unmapping of bounds tables happends under vm_munmap().  We
also (indirectly) call vm_munmap() to _do_ the unmapping of the
bounds tables.  We avoid unbounded recursion by disallowing
freeing of bounds tables *for* bounds tables.  This would not
occur normally, so should not have any practical impact.  Being
strict about it here helps ensure that we do not have an
exploitable stack overflow.
Based-on-patch-by: default avatarQiaowei Ren <qiaowei.ren@intel.com>
Signed-off-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Cc: linux-mm@kvack.org
Cc: linux-mips@linux-mips.org
Cc: Dave Hansen <dave@sr71.net>
Link: http://lkml.kernel.org/r/20141114151831.E4531C4A@viggo.jf.intel.comSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent fe3d197f
...@@ -109,4 +109,10 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, ...@@ -109,4 +109,10 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
mpx_mm_init(mm); mpx_mm_init(mm);
} }
static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
mpx_notify_unmap(mm, vma, start, end);
}
#endif /* _ASM_X86_MMU_CONTEXT_H */ #endif /* _ASM_X86_MMU_CONTEXT_H */
...@@ -51,6 +51,13 @@ ...@@ -51,6 +51,13 @@
#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
#define MPX_BNDSTA_ERROR_CODE 0x3 #define MPX_BNDSTA_ERROR_CODE 0x3
#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1)
#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1)
#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \
MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
#ifdef CONFIG_X86_INTEL_MPX #ifdef CONFIG_X86_INTEL_MPX
siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
struct xsave_struct *xsave_buf); struct xsave_struct *xsave_buf);
...@@ -67,6 +74,8 @@ static inline void mpx_mm_init(struct mm_struct *mm) ...@@ -67,6 +74,8 @@ static inline void mpx_mm_init(struct mm_struct *mm)
*/ */
mm->bd_addr = MPX_INVALID_BOUNDS_DIR; mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
} }
void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end);
#else #else
static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
struct xsave_struct *xsave_buf) struct xsave_struct *xsave_buf)
...@@ -84,6 +93,11 @@ static inline int kernel_managing_mpx_tables(struct mm_struct *mm) ...@@ -84,6 +93,11 @@ static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
static inline void mpx_mm_init(struct mm_struct *mm) static inline void mpx_mm_init(struct mm_struct *mm)
{ {
} }
static inline void mpx_notify_unmap(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
#endif /* CONFIG_X86_INTEL_MPX */ #endif /* CONFIG_X86_INTEL_MPX */
#endif /* _ASM_X86_MPX_H */ #endif /* _ASM_X86_MPX_H */
This diff is collapsed.
...@@ -47,4 +47,10 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, ...@@ -47,4 +47,10 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{ {
} }
static inline void arch_unmap(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
#endif /* __ASM_GENERIC_MMU_CONTEXT_H */ #endif /* __ASM_GENERIC_MMU_CONTEXT_H */
...@@ -2597,6 +2597,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) ...@@ -2597,6 +2597,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
detach_vmas_to_be_unmapped(mm, vma, prev, end); detach_vmas_to_be_unmapped(mm, vma, prev, end);
unmap_region(mm, vma, prev, start, end); unmap_region(mm, vma, prev, start, end);
arch_unmap(mm, vma, start, end);
/* Fix up all other VM information */ /* Fix up all other VM information */
remove_vma_list(mm, vma); remove_vma_list(mm, vma);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment