Commit 20c759ca authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge small final update from Andrew Morton:

 - DAX feature work: add fsync/msync support

 - kfree cleanup, MAINTAINERS update

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  MAINTAINERS: return arch/sh to maintained state, with new maintainers
  tree wide: use kvfree() than conditional kfree()/vfree()
  dax: never rely on bh.b_dev being set by get_block()
  xfs: call dax_pfn_mkwrite() for DAX fsync/msync
  ext4: call dax_pfn_mkwrite() for DAX fsync/msync
  ext2: call dax_pfn_mkwrite() for DAX fsync/msync
  dax: add support for fsync/sync
  mm: add find_get_entries_tag()
  dax: support dirty DAX entries in radix tree
  pmem: add wb_cache_pmem() to the PMEM API
  dax: fix conversion of holes to PMDs
  dax: fix NULL pointer dereference in __dax_dbg()
parents b82dde02 114bf37e
...@@ -10453,9 +10453,11 @@ S: Maintained ...@@ -10453,9 +10453,11 @@ S: Maintained
F: drivers/net/ethernet/dlink/sundance.c F: drivers/net/ethernet/dlink/sundance.c
SUPERH SUPERH
M: Yoshinori Sato <ysato@users.sourceforge.jp>
M: Rich Felker <dalias@libc.org>
L: linux-sh@vger.kernel.org L: linux-sh@vger.kernel.org
Q: http://patchwork.kernel.org/project/linux-sh/list/ Q: http://patchwork.kernel.org/project/linux-sh/list/
S: Orphan S: Maintained
F: Documentation/sh/ F: Documentation/sh/
F: arch/sh/ F: arch/sh/
F: drivers/sh/ F: drivers/sh/
......
...@@ -1200,10 +1200,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, ...@@ -1200,10 +1200,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
while (i--) while (i--)
if (pages[i]) if (pages[i])
__free_pages(pages[i], 0); __free_pages(pages[i], 0);
if (array_size <= PAGE_SIZE) kvfree(pages);
kfree(pages);
else
vfree(pages);
return NULL; return NULL;
} }
...@@ -1211,7 +1208,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, ...@@ -1211,7 +1208,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
size_t size, struct dma_attrs *attrs) size_t size, struct dma_attrs *attrs)
{ {
int count = size >> PAGE_SHIFT; int count = size >> PAGE_SHIFT;
int array_size = count * sizeof(struct page *);
int i; int i;
if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) { if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
...@@ -1222,10 +1218,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages, ...@@ -1222,10 +1218,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
__free_pages(pages[i], 0); __free_pages(pages[i], 0);
} }
if (array_size <= PAGE_SIZE) kvfree(pages);
kfree(pages);
else
vfree(pages);
return 0; return 0;
} }
......
...@@ -67,18 +67,19 @@ static inline void arch_wmb_pmem(void) ...@@ -67,18 +67,19 @@ static inline void arch_wmb_pmem(void)
} }
/** /**
* __arch_wb_cache_pmem - write back a cache range with CLWB * arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address * @vaddr: virtual start address
* @size: number of bytes to write back * @size: number of bytes to write back
* *
* Write back a cache range using the CLWB (cache line write back) * Write back a cache range using the CLWB (cache line write back)
* instruction. This function requires explicit ordering with an * instruction. This function requires explicit ordering with an
* arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation. * arch_wmb_pmem() call.
*/ */
static inline void __arch_wb_cache_pmem(void *vaddr, size_t size) static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
{ {
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1; unsigned long clflush_mask = x86_clflush_size - 1;
void *vaddr = (void __force *)addr;
void *vend = vaddr + size; void *vend = vaddr + size;
void *p; void *p;
...@@ -115,7 +116,7 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, ...@@ -115,7 +116,7 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
len = copy_from_iter_nocache(vaddr, bytes, i); len = copy_from_iter_nocache(vaddr, bytes, i);
if (__iter_needs_pmem_wb(i)) if (__iter_needs_pmem_wb(i))
__arch_wb_cache_pmem(vaddr, bytes); arch_wb_cache_pmem(addr, bytes);
return len; return len;
} }
...@@ -133,7 +134,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) ...@@ -133,7 +134,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
void *vaddr = (void __force *)addr; void *vaddr = (void __force *)addr;
memset(vaddr, 0, size); memset(vaddr, 0, size);
__arch_wb_cache_pmem(vaddr, size); arch_wb_cache_pmem(addr, size);
} }
static inline bool __arch_has_wmb_pmem(void) static inline bool __arch_has_wmb_pmem(void)
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/pstore.h> #include <linux/pstore.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/mm.h> /* kvfree() */
#include <acpi/apei.h> #include <acpi/apei.h>
#include "apei-internal.h" #include "apei-internal.h"
...@@ -532,10 +533,7 @@ static int __erst_record_id_cache_add_one(void) ...@@ -532,10 +533,7 @@ static int __erst_record_id_cache_add_one(void)
return -ENOMEM; return -ENOMEM;
memcpy(new_entries, entries, memcpy(new_entries, entries,
erst_record_id_cache.len * sizeof(entries[0])); erst_record_id_cache.len * sizeof(entries[0]));
if (erst_record_id_cache.size < PAGE_SIZE) kvfree(entries);
kfree(entries);
else
vfree(entries);
erst_record_id_cache.entries = entries = new_entries; erst_record_id_cache.entries = entries = new_entries;
erst_record_id_cache.size = new_size; erst_record_id_cache.size = new_size;
} }
......
...@@ -364,12 +364,9 @@ static void bm_free_pages(struct page **pages, unsigned long number) ...@@ -364,12 +364,9 @@ static void bm_free_pages(struct page **pages, unsigned long number)
} }
} }
static void bm_vk_free(void *ptr, int v) static inline void bm_vk_free(void *ptr)
{ {
if (v) kvfree(ptr);
vfree(ptr);
else
kfree(ptr);
} }
/* /*
...@@ -379,7 +376,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) ...@@ -379,7 +376,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
{ {
struct page **old_pages = b->bm_pages; struct page **old_pages = b->bm_pages;
struct page **new_pages, *page; struct page **new_pages, *page;
unsigned int i, bytes, vmalloced = 0; unsigned int i, bytes;
unsigned long have = b->bm_number_of_pages; unsigned long have = b->bm_number_of_pages;
BUG_ON(have == 0 && old_pages != NULL); BUG_ON(have == 0 && old_pages != NULL);
...@@ -401,7 +398,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) ...@@ -401,7 +398,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
PAGE_KERNEL); PAGE_KERNEL);
if (!new_pages) if (!new_pages)
return NULL; return NULL;
vmalloced = 1;
} }
if (want >= have) { if (want >= have) {
...@@ -411,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) ...@@ -411,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
if (!page) { if (!page) {
bm_free_pages(new_pages + have, i - have); bm_free_pages(new_pages + have, i - have);
bm_vk_free(new_pages, vmalloced); bm_vk_free(new_pages);
return NULL; return NULL;
} }
/* we want to know which page it is /* we want to know which page it is
...@@ -427,11 +423,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) ...@@ -427,11 +423,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
*/ */
} }
if (vmalloced)
b->bm_flags |= BM_P_VMALLOCED;
else
b->bm_flags &= ~BM_P_VMALLOCED;
return new_pages; return new_pages;
} }
...@@ -469,7 +460,7 @@ void drbd_bm_cleanup(struct drbd_device *device) ...@@ -469,7 +460,7 @@ void drbd_bm_cleanup(struct drbd_device *device)
if (!expect(device->bitmap)) if (!expect(device->bitmap))
return; return;
bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages); bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags)); bm_vk_free(device->bitmap->bm_pages);
kfree(device->bitmap); kfree(device->bitmap);
device->bitmap = NULL; device->bitmap = NULL;
} }
...@@ -643,7 +634,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi ...@@ -643,7 +634,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
unsigned long want, have, onpages; /* number of pages */ unsigned long want, have, onpages; /* number of pages */
struct page **npages, **opages = NULL; struct page **npages, **opages = NULL;
int err = 0, growing; int err = 0, growing;
int opages_vmalloced;
if (!expect(b)) if (!expect(b))
return -ENOMEM; return -ENOMEM;
...@@ -656,8 +646,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi ...@@ -656,8 +646,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
if (capacity == b->bm_dev_capacity) if (capacity == b->bm_dev_capacity)
goto out; goto out;
opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
if (capacity == 0) { if (capacity == 0) {
spin_lock_irq(&b->bm_lock); spin_lock_irq(&b->bm_lock);
opages = b->bm_pages; opages = b->bm_pages;
...@@ -671,7 +659,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi ...@@ -671,7 +659,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
b->bm_dev_capacity = 0; b->bm_dev_capacity = 0;
spin_unlock_irq(&b->bm_lock); spin_unlock_irq(&b->bm_lock);
bm_free_pages(opages, onpages); bm_free_pages(opages, onpages);
bm_vk_free(opages, opages_vmalloced); bm_vk_free(opages);
goto out; goto out;
} }
bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
...@@ -744,7 +732,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi ...@@ -744,7 +732,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
spin_unlock_irq(&b->bm_lock); spin_unlock_irq(&b->bm_lock);
if (opages != npages) if (opages != npages)
bm_vk_free(opages, opages_vmalloced); bm_vk_free(opages);
if (!growing) if (!growing)
b->bm_set = bm_count_bits(b); b->bm_set = bm_count_bits(b);
drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
......
...@@ -536,9 +536,6 @@ struct drbd_bitmap; /* opaque for drbd_device */ ...@@ -536,9 +536,6 @@ struct drbd_bitmap; /* opaque for drbd_device */
/* definition of bits in bm_flags to be used in drbd_bm_lock /* definition of bits in bm_flags to be used in drbd_bm_lock
* and drbd_bitmap_io and friends. */ * and drbd_bitmap_io and friends. */
enum bm_flag { enum bm_flag {
/* do we need to kfree, or vfree bm_pages? */
BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
/* currently locked for bulk operation */ /* currently locked for bulk operation */
BM_LOCKED_MASK = 0xf, BM_LOCKED_MASK = 0xf,
......
...@@ -93,14 +93,11 @@ struct vma_data { ...@@ -93,14 +93,11 @@ struct vma_data {
spinlock_t lock; /* Serialize access to this structure. */ spinlock_t lock; /* Serialize access to this structure. */
int count; /* Number of pages allocated. */ int count; /* Number of pages allocated. */
enum mspec_page_type type; /* Type of pages allocated. */ enum mspec_page_type type; /* Type of pages allocated. */
int flags; /* See VMD_xxx below. */
unsigned long vm_start; /* Original (unsplit) base. */ unsigned long vm_start; /* Original (unsplit) base. */
unsigned long vm_end; /* Original (unsplit) end. */ unsigned long vm_end; /* Original (unsplit) end. */
unsigned long maddr[0]; /* Array of MSPEC addresses. */ unsigned long maddr[0]; /* Array of MSPEC addresses. */
}; };
#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */
/* used on shub2 to clear FOP cache in the HUB */ /* used on shub2 to clear FOP cache in the HUB */
static unsigned long scratch_page[MAX_NUMNODES]; static unsigned long scratch_page[MAX_NUMNODES];
#define SH2_AMO_CACHE_ENTRIES 4 #define SH2_AMO_CACHE_ENTRIES 4
...@@ -185,10 +182,7 @@ mspec_close(struct vm_area_struct *vma) ...@@ -185,10 +182,7 @@ mspec_close(struct vm_area_struct *vma)
"failed to zero page %ld\n", my_page); "failed to zero page %ld\n", my_page);
} }
if (vdata->flags & VMD_VMALLOCED) kvfree(vdata);
vfree(vdata);
else
kfree(vdata);
} }
/* /*
...@@ -256,7 +250,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, ...@@ -256,7 +250,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
enum mspec_page_type type) enum mspec_page_type type)
{ {
struct vma_data *vdata; struct vma_data *vdata;
int pages, vdata_size, flags = 0; int pages, vdata_size;
if (vma->vm_pgoff != 0) if (vma->vm_pgoff != 0)
return -EINVAL; return -EINVAL;
...@@ -271,16 +265,13 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, ...@@ -271,16 +265,13 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
vdata_size = sizeof(struct vma_data) + pages * sizeof(long); vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
if (vdata_size <= PAGE_SIZE) if (vdata_size <= PAGE_SIZE)
vdata = kzalloc(vdata_size, GFP_KERNEL); vdata = kzalloc(vdata_size, GFP_KERNEL);
else { else
vdata = vzalloc(vdata_size); vdata = vzalloc(vdata_size);
flags = VMD_VMALLOCED;
}
if (!vdata) if (!vdata)
return -ENOMEM; return -ENOMEM;
vdata->vm_start = vma->vm_start; vdata->vm_start = vma->vm_start;
vdata->vm_end = vma->vm_end; vdata->vm_end = vma->vm_end;
vdata->flags = flags;
vdata->type = type; vdata->type = type;
spin_lock_init(&vdata->lock); spin_lock_init(&vdata->lock);
atomic_set(&vdata->refcnt, 1); atomic_set(&vdata->refcnt, 1);
......
...@@ -198,10 +198,7 @@ EXPORT_SYMBOL(drm_ht_remove_item); ...@@ -198,10 +198,7 @@ EXPORT_SYMBOL(drm_ht_remove_item);
void drm_ht_remove(struct drm_open_hash *ht) void drm_ht_remove(struct drm_open_hash *ht)
{ {
if (ht->table) { if (ht->table) {
if ((PAGE_SIZE / sizeof(*ht->table)) >> ht->order) kvfree(ht->table);
kfree(ht->table);
else
vfree(ht->table);
ht->table = NULL; ht->table = NULL;
} }
} }
......
...@@ -151,16 +151,12 @@ do { \ ...@@ -151,16 +151,12 @@ do { \
#define LIBCFS_FREE(ptr, size) \ #define LIBCFS_FREE(ptr, size) \
do { \ do { \
int s = (size); \
if (unlikely((ptr) == NULL)) { \ if (unlikely((ptr) == NULL)) { \
CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
"%s:%d\n", s, __FILE__, __LINE__); \ "%s:%d\n", (int)(size), __FILE__, __LINE__); \
break; \ break; \
} \ } \
if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \ kvfree(ptr); \
vfree(ptr); \
else \
kfree(ptr); \
} while (0) } while (0)
/******************************************************************************/ /******************************************************************************/
......
...@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev) ...@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
{ {
struct address_space *mapping = bdev->bd_inode->i_mapping; struct address_space *mapping = bdev->bd_inode->i_mapping;
if (mapping->nrpages == 0 && mapping->nrshadows == 0) if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return; return;
invalidate_bh_lrus(); invalidate_bh_lrus();
......
...@@ -72,8 +72,7 @@ void coda_sysctl_clean(void); ...@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
} while (0) } while (0)
#define CODA_FREE(ptr,size) \ #define CODA_FREE(ptr, size) kvfree((ptr))
do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
/* inode to cnode access functions */ /* inode to cnode access functions */
......
This diff is collapsed.
...@@ -102,8 +102,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, ...@@ -102,8 +102,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
{ {
struct inode *inode = file_inode(vma->vm_file); struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode); struct ext2_inode_info *ei = EXT2_I(inode);
int ret = VM_FAULT_NOPAGE;
loff_t size; loff_t size;
int ret;
sb_start_pagefault(inode->i_sb); sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
...@@ -113,6 +113,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, ...@@ -113,6 +113,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size) if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
else
ret = dax_pfn_mkwrite(vma, vmf);
up_read(&ei->dax_sem); up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb); sb_end_pagefault(inode->i_sb);
......
...@@ -291,8 +291,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, ...@@ -291,8 +291,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
{ {
struct inode *inode = file_inode(vma->vm_file); struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
int ret = VM_FAULT_NOPAGE;
loff_t size; loff_t size;
int ret;
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vma->vm_file); file_update_time(vma->vm_file);
...@@ -300,6 +300,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, ...@@ -300,6 +300,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size) if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
else
ret = dax_pfn_mkwrite(vma, vmf);
up_read(&EXT4_I(inode)->i_mmap_sem); up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb); sb_end_pagefault(sb);
......
...@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode) ...@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode)
*/ */
spin_lock_irq(&inode->i_data.tree_lock); spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages); BUG_ON(inode->i_data.nrpages);
BUG_ON(inode->i_data.nrshadows); BUG_ON(inode->i_data.nrexceptional);
spin_unlock_irq(&inode->i_data.tree_lock); spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!(inode->i_state & I_FREEING));
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/mtd/mtd.h> #include <linux/mtd/mtd.h>
#include <linux/mm.h> /* kvfree() */
#include "nodelist.h" #include "nodelist.h"
static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *, static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
...@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) ...@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
return 0; return 0;
out_free: out_free:
#ifndef __ECOS kvfree(c->blocks);
if (jffs2_blocks_use_vmalloc(c))
vfree(c->blocks);
else
#endif
kfree(c->blocks);
return ret; return ret;
} }
...@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) ...@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
out_root: out_root:
jffs2_free_ino_caches(c); jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c); jffs2_free_raw_node_refs(c);
if (jffs2_blocks_use_vmalloc(c)) kvfree(c->blocks);
vfree(c->blocks);
else
kfree(c->blocks);
out_inohash: out_inohash:
jffs2_clear_xattr_subsystem(c); jffs2_clear_xattr_subsystem(c);
kfree(c->inocache_list); kfree(c->inocache_list);
......
...@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb) ...@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
jffs2_free_ino_caches(c); jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c); jffs2_free_raw_node_refs(c);
if (jffs2_blocks_use_vmalloc(c)) kvfree(c->blocks);
vfree(c->blocks);
else
kfree(c->blocks);
jffs2_flash_cleanup(c); jffs2_flash_cleanup(c);
kfree(c->inocache_list); kfree(c->inocache_list);
jffs2_clear_xattr_subsystem(c); jffs2_clear_xattr_subsystem(c);
......
...@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap) ...@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
{ {
int i; int i;
int nr_groups = bitmap->s_nr_groups; int nr_groups = bitmap->s_nr_groups;
int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
nr_groups);
for (i = 0; i < nr_groups; i++) for (i = 0; i < nr_groups; i++)
if (bitmap->s_block_bitmap[i]) if (bitmap->s_block_bitmap[i])
brelse(bitmap->s_block_bitmap[i]); brelse(bitmap->s_block_bitmap[i]);
if (size <= PAGE_SIZE) kvfree(bitmap);
kfree(bitmap);
else
vfree(bitmap);
} }
static void udf_free_partition(struct udf_part_map *map) static void udf_free_partition(struct udf_part_map *map)
......
...@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault( ...@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault(
/* /*
* pfn_mkwrite was originally inteneded to ensure we capture time stamp * pfn_mkwrite was originally inteneded to ensure we capture time stamp
* updates on write faults. In reality, it's need to serialise against * updates on write faults. In reality, it's need to serialise against
* truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite() * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
* here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault * to ensure we serialise the fault barrier in place.
* barrier in place.
*/ */
static int static int
xfs_filemap_pfn_mkwrite( xfs_filemap_pfn_mkwrite(
...@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite( ...@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size) if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
else if (IS_DAX(inode))
ret = dax_pfn_mkwrite(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb); sb_end_pagefault(inode->i_sb);
return ret; return ret;
......
...@@ -36,4 +36,11 @@ static inline bool vma_is_dax(struct vm_area_struct *vma) ...@@ -36,4 +36,11 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
{ {
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
} }
static inline bool dax_mapping(struct address_space *mapping)
{
return mapping->host && IS_DAX(mapping->host);
}
int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
loff_t end);
#endif #endif
...@@ -433,7 +433,8 @@ struct address_space { ...@@ -433,7 +433,8 @@ struct address_space {
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */ /* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */ unsigned long nrpages; /* number of total pages */
unsigned long nrshadows; /* number of shadow entries */ /* number of shadow or DAX exceptional entries */
unsigned long nrexceptional;
pgoff_t writeback_index;/* writeback starts here */ pgoff_t writeback_index;/* writeback starts here */
const struct address_space_operations *a_ops; /* methods */ const struct address_space_operations *a_ops; /* methods */
unsigned long flags; /* error bits/gfp mask */ unsigned long flags; /* error bits/gfp mask */
......
...@@ -361,6 +361,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, ...@@ -361,6 +361,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages); unsigned int nr_pages, struct page **pages);
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
int tag, unsigned int nr_pages, struct page **pages); int tag, unsigned int nr_pages, struct page **pages);
unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
int tag, unsigned int nr_entries,
struct page **entries, pgoff_t *indices);
struct page *grab_cache_page_write_begin(struct address_space *mapping, struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags); pgoff_t index, unsigned flags);
......
...@@ -53,12 +53,18 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) ...@@ -53,12 +53,18 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
{ {
BUG(); BUG();
} }
static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
{
BUG();
}
#endif #endif
/* /*
* Architectures that define ARCH_HAS_PMEM_API must provide * Architectures that define ARCH_HAS_PMEM_API must provide
* implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
* arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem(). * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
* and arch_has_wmb_pmem().
*/ */
static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
{ {
...@@ -178,4 +184,18 @@ static inline void clear_pmem(void __pmem *addr, size_t size) ...@@ -178,4 +184,18 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
else else
default_clear_pmem(addr, size); default_clear_pmem(addr, size);
} }
/**
* wb_cache_pmem - write back processor cache for PMEM memory range
* @addr: virtual start address
* @size: number of bytes to write back
*
* Write back the processor cache range starting at 'addr' for 'size' bytes.
* This function requires explicit ordering with a wmb_pmem() call.
*/
static inline void wb_cache_pmem(void __pmem *addr, size_t size)
{
if (arch_has_pmem_api())
arch_wb_cache_pmem(addr, size);
}
#endif /* __PMEM_H__ */ #endif /* __PMEM_H__ */
...@@ -51,6 +51,15 @@ ...@@ -51,6 +51,15 @@
#define RADIX_TREE_EXCEPTIONAL_ENTRY 2 #define RADIX_TREE_EXCEPTIONAL_ENTRY 2
#define RADIX_TREE_EXCEPTIONAL_SHIFT 2 #define RADIX_TREE_EXCEPTIONAL_SHIFT 2
#define RADIX_DAX_MASK 0xf
#define RADIX_DAX_SHIFT 4
#define RADIX_DAX_PTE (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY)
#define RADIX_DAX_PMD (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY)
#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK)
#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE)))
static inline int radix_tree_is_indirect_ptr(void *ptr) static inline int radix_tree_is_indirect_ptr(void *ptr)
{ {
return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR); return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
......
...@@ -1493,7 +1493,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, ...@@ -1493,7 +1493,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
wake_up_sem_queue_do(&tasks); wake_up_sem_queue_do(&tasks);
out_free: out_free:
if (sem_io != fast_sem_io) if (sem_io != fast_sem_io)
ipc_free(sem_io, sizeof(ushort)*nsems); ipc_free(sem_io);
return err; return err;
} }
......
...@@ -414,17 +414,12 @@ void *ipc_alloc(int size) ...@@ -414,17 +414,12 @@ void *ipc_alloc(int size)
/** /**
* ipc_free - free ipc space * ipc_free - free ipc space
* @ptr: pointer returned by ipc_alloc * @ptr: pointer returned by ipc_alloc
* @size: size of block
* *
* Free a block created with ipc_alloc(). The caller must know the size * Free a block created with ipc_alloc().
* used in the allocation call.
*/ */
void ipc_free(void *ptr, int size) void ipc_free(void *ptr)
{ {
if (size > PAGE_SIZE) kvfree(ptr);
vfree(ptr);
else
kfree(ptr);
} }
/** /**
......
...@@ -118,7 +118,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg); ...@@ -118,7 +118,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
* both function can sleep * both function can sleep
*/ */
void *ipc_alloc(int size); void *ipc_alloc(int size);
void ipc_free(void *ptr, int size); void ipc_free(void *ptr);
/* /*
* For allocation that need to be freed by RCU. * For allocation that need to be freed by RCU.
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
*/ */
#include <linux/export.h> #include <linux/export.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/dax.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/capability.h> #include <linux/capability.h>
...@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping, ...@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
__radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
if (shadow) { if (shadow) {
mapping->nrshadows++; mapping->nrexceptional++;
/* /*
* Make sure the nrshadows update is committed before * Make sure the nrexceptional update is committed before
* the nrpages update so that final truncate racing * the nrpages update so that final truncate racing
* with reclaim does not see both counters 0 at the * with reclaim does not see both counters 0 at the
* same time and miss a shadow entry. * same time and miss a shadow entry.
...@@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping, ...@@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping,
{ {
int err = 0; int err = 0;
if (dax_mapping(mapping) && mapping->nrexceptional) {
err = dax_writeback_mapping_range(mapping, lstart, lend);
if (err)
return err;
}
if (mapping->nrpages) { if (mapping->nrpages) {
err = __filemap_fdatawrite_range(mapping, lstart, lend, err = __filemap_fdatawrite_range(mapping, lstart, lend,
WB_SYNC_ALL); WB_SYNC_ALL);
...@@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping, ...@@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping,
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
if (!radix_tree_exceptional_entry(p)) if (!radix_tree_exceptional_entry(p))
return -EEXIST; return -EEXIST;
if (WARN_ON(dax_mapping(mapping)))
return -EINVAL;
if (shadowp) if (shadowp)
*shadowp = p; *shadowp = p;
mapping->nrshadows--; mapping->nrexceptional--;
if (node) if (node)
workingset_node_shadows_dec(node); workingset_node_shadows_dec(node);
} }
...@@ -1245,9 +1256,9 @@ unsigned find_get_entries(struct address_space *mapping, ...@@ -1245,9 +1256,9 @@ unsigned find_get_entries(struct address_space *mapping,
if (radix_tree_deref_retry(page)) if (radix_tree_deref_retry(page))
goto restart; goto restart;
/* /*
* A shadow entry of a recently evicted page, * A shadow entry of a recently evicted page, a swap
* or a swap entry from shmem/tmpfs. Return * entry from shmem/tmpfs or a DAX entry. Return it
* it without attempting to raise page count. * without attempting to raise page count.
*/ */
goto export; goto export;
} }
...@@ -1494,6 +1505,74 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, ...@@ -1494,6 +1505,74 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
} }
EXPORT_SYMBOL(find_get_pages_tag); EXPORT_SYMBOL(find_get_pages_tag);
/**
* find_get_entries_tag - find and return entries that match @tag
* @mapping: the address_space to search
* @start: the starting page cache index
* @tag: the tag index
* @nr_entries: the maximum number of entries
* @entries: where the resulting entries are placed
* @indices: the cache indices corresponding to the entries in @entries
*
* Like find_get_entries, except we only return entries which are tagged with
* @tag.
*/
unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
int tag, unsigned int nr_entries,
struct page **entries, pgoff_t *indices)
{
void **slot;
unsigned int ret = 0;
struct radix_tree_iter iter;
if (!nr_entries)
return 0;
rcu_read_lock();
restart:
radix_tree_for_each_tagged(slot, &mapping->page_tree,
&iter, start, tag) {
struct page *page;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
continue;
if (radix_tree_exception(page)) {
if (radix_tree_deref_retry(page)) {
/*
* Transient condition which can only trigger
* when entry at index 0 moves out of or back
* to root: none yet gotten, safe to restart.
*/
goto restart;
}
/*
* A shadow entry of a recently evicted page, a swap
* entry from shmem/tmpfs or a DAX entry. Return it
* without attempting to raise page count.
*/
goto export;
}
if (!page_cache_get_speculative(page))
goto repeat;
/* Has the page moved? */
if (unlikely(page != *slot)) {
page_cache_release(page);
goto repeat;
}
export:
indices[ret] = iter.index;
entries[ret] = page;
if (++ret == nr_entries)
break;
}
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL(find_get_entries_tag);
/* /*
* CD/DVDs are error prone. When a medium error occurs, the driver may fail * CD/DVDs are error prone. When a medium error occurs, the driver may fail
* a _large_ part of the i/o request. Imagine the worst scenario: * a _large_ part of the i/o request. Imagine the worst scenario:
......
...@@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size) ...@@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size)
/** /**
* pcpu_mem_free - free memory * pcpu_mem_free - free memory
* @ptr: memory to free * @ptr: memory to free
* @size: size of the area
* *
* Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc().
*/ */
static void pcpu_mem_free(void *ptr, size_t size) static void pcpu_mem_free(void *ptr)
{ {
if (size <= PAGE_SIZE) kvfree(ptr);
kfree(ptr);
else
vfree(ptr);
} }
/** /**
...@@ -463,8 +459,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) ...@@ -463,8 +459,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
* pcpu_mem_free() might end up calling vfree() which uses * pcpu_mem_free() might end up calling vfree() which uses
* IRQ-unsafe lock and thus can't be called under pcpu_lock. * IRQ-unsafe lock and thus can't be called under pcpu_lock.
*/ */
pcpu_mem_free(old, old_size); pcpu_mem_free(old);
pcpu_mem_free(new, new_size); pcpu_mem_free(new);
return 0; return 0;
} }
...@@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) ...@@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
sizeof(chunk->map[0])); sizeof(chunk->map[0]));
if (!chunk->map) { if (!chunk->map) {
pcpu_mem_free(chunk, pcpu_chunk_struct_size); pcpu_mem_free(chunk);
return NULL; return NULL;
} }
...@@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) ...@@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
{ {
if (!chunk) if (!chunk)
return; return;
pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0])); pcpu_mem_free(chunk->map);
pcpu_mem_free(chunk, pcpu_chunk_struct_size); pcpu_mem_free(chunk);
} }
/** /**
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/dax.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/swap.h> #include <linux/swap.h>
...@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping, ...@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping,
return; return;
spin_lock_irq(&mapping->tree_lock); spin_lock_irq(&mapping->tree_lock);
/*
* Regular page slots are stabilized by the page lock even if (dax_mapping(mapping)) {
* without the tree itself locked. These unlocked entries if (radix_tree_delete_item(&mapping->page_tree, index, entry))
* need verification under the tree lock. mapping->nrexceptional--;
*/ } else {
if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) /*
goto unlock; * Regular page slots are stabilized by the page lock even
if (*slot != entry) * without the tree itself locked. These unlocked entries
goto unlock; * need verification under the tree lock.
radix_tree_replace_slot(slot, NULL); */
mapping->nrshadows--; if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
if (!node) &slot))
goto unlock; goto unlock;
workingset_node_shadows_dec(node); if (*slot != entry)
/* goto unlock;
* Don't track node without shadow entries. radix_tree_replace_slot(slot, NULL);
* mapping->nrexceptional--;
* Avoid acquiring the list_lru lock if already untracked. if (!node)
* The list_empty() test is safe as node->private_list is goto unlock;
* protected by mapping->tree_lock. workingset_node_shadows_dec(node);
*/ /*
if (!workingset_node_shadows(node) && * Don't track node without shadow entries.
!list_empty(&node->private_list)) *
list_lru_del(&workingset_shadow_nodes, &node->private_list); * Avoid acquiring the list_lru lock if already untracked.
__radix_tree_delete_node(&mapping->page_tree, node); * The list_empty() test is safe as node->private_list is
* protected by mapping->tree_lock.
*/
if (!workingset_node_shadows(node) &&
!list_empty(&node->private_list))
list_lru_del(&workingset_shadow_nodes,
&node->private_list);
__radix_tree_delete_node(&mapping->page_tree, node);
}
unlock: unlock:
spin_unlock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock);
} }
...@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping, ...@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
int i; int i;
cleancache_invalidate_inode(mapping); cleancache_invalidate_inode(mapping);
if (mapping->nrpages == 0 && mapping->nrshadows == 0) if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return; return;
/* Offsets within partial pages */ /* Offsets within partial pages */
...@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages); ...@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
*/ */
void truncate_inode_pages_final(struct address_space *mapping) void truncate_inode_pages_final(struct address_space *mapping)
{ {
unsigned long nrshadows; unsigned long nrexceptional;
unsigned long nrpages; unsigned long nrpages;
/* /*
...@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping) ...@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping)
/* /*
* When reclaim installs eviction entries, it increases * When reclaim installs eviction entries, it increases
* nrshadows first, then decreases nrpages. Make sure we see * nrexceptional first, then decreases nrpages. Make sure we see
* this in the right order or we might miss an entry. * this in the right order or we might miss an entry.
*/ */
nrpages = mapping->nrpages; nrpages = mapping->nrpages;
smp_rmb(); smp_rmb();
nrshadows = mapping->nrshadows; nrexceptional = mapping->nrexceptional;
if (nrpages || nrshadows) { if (nrpages || nrexceptional) {
/* /*
* As truncation uses a lockless tree lookup, cycle * As truncation uses a lockless tree lookup, cycle
* the tree lock to make sure any ongoing tree * the tree lock to make sure any ongoing tree
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <linux/oom.h> #include <linux/oom.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/printk.h> #include <linux/printk.h>
#include <linux/dax.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/div64.h> #include <asm/div64.h>
...@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, ...@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* inode reclaim needs to empty out the radix tree or * inode reclaim needs to empty out the radix tree or
* the nodes are lost. Don't plant shadows behind its * the nodes are lost. Don't plant shadows behind its
* back. * back.
*
* We also don't store shadows for DAX mappings because the
* only page cache pages found in these are zero pages
* covering holes, and because we don't want to mix DAX
* exceptional entries and shadow exceptional entries in the
* same page_tree.
*/ */
if (reclaimed && page_is_file_cache(page) && if (reclaimed && page_is_file_cache(page) &&
!mapping_exiting(mapping)) !mapping_exiting(mapping) && !dax_mapping(mapping))
shadow = workingset_eviction(mapping, page); shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow, memcg); __delete_from_page_cache(page, shadow, memcg);
spin_unlock_irqrestore(&mapping->tree_lock, flags); spin_unlock_irqrestore(&mapping->tree_lock, flags);
......
...@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, ...@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
node->slots[i] = NULL; node->slots[i] = NULL;
BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
node->count -= 1U << RADIX_TREE_COUNT_SHIFT; node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
BUG_ON(!mapping->nrshadows); BUG_ON(!mapping->nrexceptional);
mapping->nrshadows--; mapping->nrexceptional--;
} }
} }
BUG_ON(node->count); BUG_ON(node->count);
......
...@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head) ...@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head)
if (!n->tn_bits) if (!n->tn_bits)
kmem_cache_free(trie_leaf_kmem, n); kmem_cache_free(trie_leaf_kmem, n);
else if (n->tn_bits <= TNODE_KMALLOC_MAX)
kfree(n);
else else
vfree(n); kvfree(n);
} }
#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment