Commit db6e3304 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew Morton)

Merge more patches from Andrew Morton:
 "The rest of MM"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm: remove free_area_cache
  zswap: add documentation
  zswap: add to mm/
  zbud: add to mm/
parents ae924949 98d1e64f
Overview:
Zswap is a lightweight compressed cache for swap pages. It takes pages that are
in the process of being swapped out and attempts to compress them into a
dynamically allocated RAM-based memory pool. zswap basically trades CPU cycles
for potentially reduced swap I/O.  This trade-off can also result in a
significant performance improvement if reads from the compressed cache are
faster than reads from a swap device.
NOTE: Zswap is a new feature as of v3.11 and interacts heavily with memory
reclaim. This interaction has not be fully explored on the large set of
potential configurations and workloads that exist. For this reason, zswap
is a work in progress and should be considered experimental.
Some potential benefits:
* Desktop/laptop users with limited RAM capacities can mitigate the
    performance impact of swapping.
* Overcommitted guests that share a common I/O resource can
    dramatically reduce their swap I/O pressure, avoiding heavy handed I/O
throttling by the hypervisor. This allows more work to get done with less
impact to the guest workload and guests sharing the I/O subsystem
* Users with SSDs as swap devices can extend the life of the device by
    drastically reducing life-shortening writes.
Zswap evicts pages from compressed cache on an LRU basis to the backing swap
device when the compressed pool reaches it size limit. This requirement had
been identified in prior community discussions.
To enabled zswap, the "enabled" attribute must be set to 1 at boot time. e.g.
zswap.enabled=1
Design:
Zswap receives pages for compression through the Frontswap API and is able to
evict pages from its own compressed pool on an LRU basis and write them back to
the backing swap device in the case that the compressed pool is full.
Zswap makes use of zbud for the managing the compressed memory pool. Each
allocation in zbud is not directly accessible by address. Rather, a handle is
return by the allocation routine and that handle must be mapped before being
accessed. The compressed memory pool grows on demand and shrinks as compressed
pages are freed. The pool is not preallocated.
When a swap page is passed from frontswap to zswap, zswap maintains a mapping
of the swap entry, a combination of the swap type and swap offset, to the zbud
handle that references that compressed swap page. This mapping is achieved
with a red-black tree per swap type. The swap offset is the search key for the
tree nodes.
During a page fault on a PTE that is a swap entry, frontswap calls the zswap
load function to decompress the page into the page allocated by the page fault
handler.
Once there are no PTEs referencing a swap page stored in zswap (i.e. the count
in the swap_map goes to 0) the swap code calls the zswap invalidate function,
via frontswap, to free the compressed entry.
Zswap seeks to be simple in its policies. Sysfs attributes allow for one user
controlled policies:
* max_pool_percent - The maximum percentage of memory that the compressed
pool can occupy.
Zswap allows the compressor to be selected at kernel boot time by setting the
“compressor” attribute. The default compressor is lzo. e.g.
zswap.compressor=deflate
A debugfs interface is provided for various statistic about pool size, number
of pages stored, and various counters for the reasons pages are rejected.
......@@ -181,11 +181,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
......
......@@ -90,11 +90,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
......
......@@ -158,11 +158,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
......
......@@ -92,10 +92,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
......@@ -91,11 +91,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
......@@ -176,11 +174,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = s390_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
......
......@@ -290,7 +290,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
sysctl_legacy_va_layout) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
/* We know it's 32-bit */
unsigned long task_size = STACK_TOP32;
......@@ -302,7 +301,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
......
......@@ -66,10 +66,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base(mm);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
......@@ -308,8 +308,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
current->mm->free_area_cache = TASK_UNMAPPED_BASE;
current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
if (retval < 0) {
......
......@@ -115,10 +115,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = mmap_legacy_base();
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
mm->unmap_area = arch_unmap_area_topdown;
}
}
......@@ -255,8 +255,6 @@ static int load_aout_binary(struct linux_binprm * bprm)
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
current->mm->free_area_cache = current->mm->mmap_base;
current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
if (retval < 0) {
......
......@@ -738,8 +738,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
current->mm->free_area_cache = current->mm->mmap_base;
current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval < 0) {
......
......@@ -330,12 +330,9 @@ struct mm_struct {
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
#endif
unsigned long mmap_base; /* base of mmap area */
unsigned long task_size; /* size of task vm space */
unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */
unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
atomic_t mm_users; /* How many users with user space? */
......
......@@ -322,8 +322,6 @@ extern unsigned long
arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags);
extern void arch_unmap_area(struct mm_struct *, unsigned long);
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#else
static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
#endif
......
#ifndef _ZBUD_H_
#define _ZBUD_H_
#include <linux/types.h>
struct zbud_pool;
struct zbud_ops {
int (*evict)(struct zbud_pool *pool, unsigned long handle);
};
struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
void zbud_destroy_pool(struct zbud_pool *pool);
int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
unsigned long *handle);
void zbud_free(struct zbud_pool *pool, unsigned long handle);
int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
void *zbud_map(struct zbud_pool *pool, unsigned long handle);
void zbud_unmap(struct zbud_pool *pool, unsigned long handle);
u64 zbud_get_pool_size(struct zbud_pool *pool);
#endif /* _ZBUD_H_ */
......@@ -365,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
mm->locked_vm = 0;
mm->mmap = NULL;
mm->mmap_cache = NULL;
mm->free_area_cache = oldmm->mmap_base;
mm->cached_hole_size = ~0UL;
mm->map_count = 0;
cpumask_clear(mm_cpumask(mm));
mm->mm_rb = RB_ROOT;
......@@ -540,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
mm->nr_ptes = 0;
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
mm_init_aio(mm);
mm_init_owner(mm, p);
......
......@@ -478,6 +478,36 @@ config FRONTSWAP
If unsure, say Y to enable frontswap.
config ZBUD
tristate
default n
help
A special purpose allocator for storing compressed pages.
It is designed to store up to two compressed pages per physical
page. While this design limits storage density, it has simple and
deterministic reclaim properties that make it preferable to a higher
density approach when reclaim will be used.
config ZSWAP
bool "Compressed cache for swap pages (EXPERIMENTAL)"
depends on FRONTSWAP && CRYPTO=y
select CRYPTO_LZO
select ZBUD
default n
help
A lightweight compressed cache for swap pages. It takes
pages that are in the process of being swapped out and attempts to
compress them into a dynamically allocated RAM-based memory pool.
This can result in a significant I/O reduction on swap device and,
in the case where decompressing from RAM is faster that swap device
reads, can also improve workload performance.
This is marked experimental because it is a new feature (as of
v3.11) that interacts heavily with memory reclaim. While these
interactions don't cause any known issues on simple memory setups,
they have not be fully explored on the large set of potential
configurations and workloads that exist.
config MEM_SOFT_DIRTY
bool "Track memory changes"
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY
......
......@@ -32,6 +32,7 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
obj-$(CONFIG_FRONTSWAP) += frontswap.o
obj-$(CONFIG_ZSWAP) += zswap.o
obj-$(CONFIG_HAS_DMA) += dmapool.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
......@@ -58,3 +59,4 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
obj-$(CONFIG_CLEANCACHE) += cleancache.o
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
obj-$(CONFIG_ZBUD) += zbud.o
......@@ -1878,15 +1878,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
}
#endif
void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
{
/*
* Is this a new hole at the lowest possible address?
*/
if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
mm->free_area_cache = addr;
}
/*
* This mmap-allocator allocates new areas top-down from below the
* stack's low limit (the base):
......@@ -1943,19 +1934,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
}
#endif
void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
{
/*
* Is this a new hole at the highest possible address?
*/
if (addr > mm->free_area_cache)
mm->free_area_cache = addr;
/* dont allow allocations above current base */
if (mm->free_area_cache > mm->mmap_base)
mm->free_area_cache = mm->mmap_base;
}
unsigned long
get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
......@@ -2376,7 +2354,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct vm_area_struct **insertion_point;
struct vm_area_struct *tail_vma = NULL;
unsigned long addr;
insertion_point = (prev ? &prev->vm_next : &mm->mmap);
vma->vm_prev = NULL;
......@@ -2393,11 +2370,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
} else
mm->highest_vm_end = prev ? prev->vm_end : 0;
tail_vma->vm_next = NULL;
if (mm->unmap_area == arch_unmap_area)
addr = prev ? prev->vm_end : mm->mmap_base;
else
addr = vma ? vma->vm_start : mm->mmap_base;
mm->unmap_area(mm, addr);
mm->mmap_cache = NULL; /* Kill the cache. */
}
......
......@@ -1871,10 +1871,6 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
return -ENOMEM;
}
void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
{
}
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen,
int even_cows)
......
......@@ -295,7 +295,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
{
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
mm->unmap_area = arch_unmap_area;
}
#endif
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment