Commit 1aac1400 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

tmpfs: quit when fallocate fills memory

As it stands, a large fallocate() on tmpfs is liable to fill memory with
pages, freed on failure except when they run into swap, at which point
they become fixed into the file despite the failure.  That feels quite
wrong, to be consuming resources precisely when they're in short supply.

Go the other way instead: shmem_fallocate() indicate the range it has
fallocated to shmem_writepage(), keeping count of pages it's allocating;
shmem_writepage() reactivate instead of swapping out pages fallocated by
this syscall (but happily swap out those from earlier occasions), keeping
count; shmem_fallocate() compare counts and give up once the reactivated
pages have started to coming back to writepage (approximately: some zones
would in fact recycle faster than others).

This is a little unusual, but works well: although we could consider the
failure to swap as a bug, and fix it later with SWAP_MAP_FALLOC handling
added in swapfile.c and memcontrol.c, I doubt that we shall ever want to.

(If there's no swap, an over-large fallocate() on tmpfs is limited in the
same way as writing: stopped by rlimit, or by tmpfs mount size if that was
set sensibly, or by __vm_enough_memory() heuristics if OVERCOMMIT_GUESS or
OVERCOMMIT_NEVER.  If OVERCOMMIT_ALWAYS, then it is liable to OOM-kill
others as writing would, but stops and frees if interrupted.)

Now that everything is freed on failure, we can then skip updating ctime.
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Cong Wang <amwang@redhat.com>
Cc: Kay Sievers <kay@vrfy.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1635f6a7
...@@ -84,6 +84,18 @@ struct shmem_xattr { ...@@ -84,6 +84,18 @@ struct shmem_xattr {
char value[0]; char value[0];
}; };
/*
* shmem_fallocate and shmem_writepage communicate via inode->i_private
* (with i_mutex making sure that it has only one user at a time):
* we would prefer not to enlarge the shmem inode just for that.
*/
struct shmem_falloc {
pgoff_t start; /* start of range currently being fallocated */
pgoff_t next; /* the next page offset to be fallocated */
pgoff_t nr_falloced; /* how many new pages have been fallocated */
pgoff_t nr_unswapped; /* how often writepage refused to swap out */
};
/* Flag allocation requirements to shmem_getpage */ /* Flag allocation requirements to shmem_getpage */
enum sgp_type { enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_READ, /* don't exceed i_size, don't allocate page */
...@@ -791,8 +803,28 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) ...@@ -791,8 +803,28 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
* This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
* value into swapfile.c, the only way we can correctly account for a * value into swapfile.c, the only way we can correctly account for a
* fallocated page arriving here is now to initialize it and write it. * fallocated page arriving here is now to initialize it and write it.
*
* That's okay for a page already fallocated earlier, but if we have
* not yet completed the fallocation, then (a) we want to keep track
* of this page in case we have to undo it, and (b) it may not be a
* good idea to continue anyway, once we're pushing into swap. So
* reactivate the page, and let shmem_fallocate() quit when too many.
*/ */
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
if (inode->i_private) {
struct shmem_falloc *shmem_falloc;
spin_lock(&inode->i_lock);
shmem_falloc = inode->i_private;
if (shmem_falloc &&
index >= shmem_falloc->start &&
index < shmem_falloc->next)
shmem_falloc->nr_unswapped++;
else
shmem_falloc = NULL;
spin_unlock(&inode->i_lock);
if (shmem_falloc)
goto redirty;
}
clear_highpage(page); clear_highpage(page);
flush_dcache_page(page); flush_dcache_page(page);
SetPageUptodate(page); SetPageUptodate(page);
...@@ -1647,6 +1679,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, ...@@ -1647,6 +1679,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
{ {
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
struct shmem_falloc shmem_falloc;
pgoff_t start, index, end; pgoff_t start, index, end;
int error; int error;
...@@ -1679,6 +1712,14 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, ...@@ -1679,6 +1712,14 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
goto out; goto out;
} }
shmem_falloc.start = start;
shmem_falloc.next = start;
shmem_falloc.nr_falloced = 0;
shmem_falloc.nr_unswapped = 0;
spin_lock(&inode->i_lock);
inode->i_private = &shmem_falloc;
spin_unlock(&inode->i_lock);
for (index = start; index < end; index++) { for (index = start; index < end; index++) {
struct page *page; struct page *page;
...@@ -1688,6 +1729,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, ...@@ -1688,6 +1729,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
*/ */
if (signal_pending(current)) if (signal_pending(current))
error = -EINTR; error = -EINTR;
else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
error = -ENOMEM;
else else
error = shmem_getpage(inode, index, &page, SGP_FALLOC, error = shmem_getpage(inode, index, &page, SGP_FALLOC,
NULL); NULL);
...@@ -1696,9 +1739,17 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, ...@@ -1696,9 +1739,17 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
shmem_undo_range(inode, shmem_undo_range(inode,
(loff_t)start << PAGE_CACHE_SHIFT, (loff_t)start << PAGE_CACHE_SHIFT,
(loff_t)index << PAGE_CACHE_SHIFT, true); (loff_t)index << PAGE_CACHE_SHIFT, true);
goto ctime; goto undone;
} }
/*
* Inform shmem_writepage() how far we have reached.
* No need for lock or barrier: we have the page lock.
*/
shmem_falloc.next++;
if (!PageUptodate(page))
shmem_falloc.nr_falloced++;
/* /*
* If !PageUptodate, leave it that way so that freeable pages * If !PageUptodate, leave it that way so that freeable pages
* can be recognized if we need to rollback on error later. * can be recognized if we need to rollback on error later.
...@@ -1714,8 +1765,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, ...@@ -1714,8 +1765,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
i_size_write(inode, offset + len); i_size_write(inode, offset + len);
ctime:
inode->i_ctime = CURRENT_TIME; inode->i_ctime = CURRENT_TIME;
undone:
spin_lock(&inode->i_lock);
inode->i_private = NULL;
spin_unlock(&inode->i_lock);
out: out:
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
return error; return error;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment