shmem.c 57.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
Linus Torvalds's avatar
Linus Torvalds committed
2
 * Resizable virtual memory filesystem for Linux.
Linus Torvalds's avatar
Linus Torvalds committed
3 4 5
 *
 * Copyright (C) 2000 Linus Torvalds.
 *		 2000 Transmeta Corp.
Linus Torvalds's avatar
Linus Torvalds committed
6 7
 *		 2000-2001 Christoph Rohland
 *		 2000-2001 SAP AG
Andrew Morton's avatar
Andrew Morton committed
8
 *		 2002 Red Hat Inc.
9 10
 * Copyright (C) 2002-2004 Hugh Dickins.
 * Copyright (C) 2002-2004 VERITAS Software Corporation.
11
 * Copyright (C) 2004 Andi Kleen, SuSE Labs
Andrew Morton's avatar
Andrew Morton committed
12
 *
13 14 15 16
 * Extended attribute support for tmpfs:
 * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
 *
17
 * This file is released under the GPL.
Linus Torvalds's avatar
Linus Torvalds committed
18 19 20
 */

/*
Linus Torvalds's avatar
Linus Torvalds committed
21 22 23
 * This virtual memory filesystem is heavily based on the ramfs. It
 * extends ramfs by the ability to use swap and honor resource limits
 * which makes it a completely usable filesystem.
Linus Torvalds's avatar
Linus Torvalds committed
24 25
 */

Linus Torvalds's avatar
Linus Torvalds committed
26
#include <linux/config.h>
Linus Torvalds's avatar
Linus Torvalds committed
27 28 29 30 31
#include <linux/module.h>
#include <linux/init.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
Andrew Morton's avatar
Andrew Morton committed
32
#include <linux/mman.h>
Linus Torvalds's avatar
Linus Torvalds committed
33 34 35 36
#include <linux/file.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/string.h>
Linus Torvalds's avatar
Linus Torvalds committed
37
#include <linux/slab.h>
38
#include <linux/backing-dev.h>
39
#include <linux/shmem_fs.h>
40
#include <linux/mount.h>
41
#include <linux/writeback.h>
42
#include <linux/vfs.h>
Andrew Morton's avatar
Andrew Morton committed
43
#include <linux/blkdev.h>
44
#include <linux/security.h>
45
#include <linux/swapops.h>
46
#include <linux/mempolicy.h>
Alexander Viro's avatar
Alexander Viro committed
47
#include <linux/namei.h>
48
#include <linux/xattr.h>
Linus Torvalds's avatar
Linus Torvalds committed
49
#include <asm/uaccess.h>
50
#include <asm/div64.h>
51
#include <asm/pgtable.h>
Linus Torvalds's avatar
Linus Torvalds committed
52

Linus Torvalds's avatar
Linus Torvalds committed
53 54
/* This magic number is used in glibc for posix shared memory */
#define TMPFS_MAGIC	0x01021994
Linus Torvalds's avatar
Linus Torvalds committed
55

Linus Torvalds's avatar
Linus Torvalds committed
56
#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
57
#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
58 59
#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)

60
#define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
61
#define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
Linus Torvalds's avatar
Linus Torvalds committed
62

63
#define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
64

65
/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
66
#define SHMEM_PAGEIN	 VM_READ
67
#define SHMEM_TRUNCATE	 VM_WRITE
68

69 70 71
/* Definition to limit shmem_truncate's steps between cond_rescheds */
#define LATENCY_LIMIT	 64

72 73 74
/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20

75 76 77
/* Keep swapped page count in private field of indirect struct page */
#define nr_swapped		private

78
/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
79
enum sgp_type {
80
	SGP_QUICK,	/* don't try more than file page cache lookup */
81 82 83
	SGP_READ,	/* don't exceed i_size, don't allocate page */
	SGP_CACHE,	/* don't exceed i_size, may allocate page */
	SGP_WRITE,	/* may exceed i_size, may allocate page */
84 85
};

86
static int shmem_getpage(struct inode *inode, unsigned long idx,
87
			 struct page **pagep, enum sgp_type sgp, int *type);
88

89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
static inline struct page *shmem_dir_alloc(unsigned int gfp_mask)
{
	/*
	 * The above definition of ENTRIES_PER_PAGE, and the use of
	 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
	 * might be reconsidered if it ever diverges from PAGE_SIZE.
	 */
	return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
}

static inline void shmem_dir_free(struct page *page)
{
	__free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
}

static struct page **shmem_dir_map(struct page *page)
{
	return (struct page **)kmap_atomic(page, KM_USER0);
}

static inline void shmem_dir_unmap(struct page **dir)
{
	kunmap_atomic(dir, KM_USER0);
}

static swp_entry_t *shmem_swp_map(struct page *page)
115 116 117 118 119
{
	return (swp_entry_t *)kmap_atomic(page, KM_USER1);
}

static inline void shmem_swp_balance_unmap(void)
120 121
{
	/*
122 123 124 125 126
	 * When passing a pointer to an i_direct entry, to code which
	 * also handles indirect entries and so will shmem_swp_unmap,
	 * we must arrange for the preempt count to remain in balance.
	 * What kmap_atomic of a lowmem page does depends on config
	 * and architecture, so pretend to kmap_atomic some lowmem page.
127
	 */
128
	(void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
129 130 131 132
}

static inline void shmem_swp_unmap(swp_entry_t *entry)
{
133
	kunmap_atomic(entry, KM_USER1);
134 135
}

136 137
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
{
138
	return sb->s_fs_info;
139
}
Linus Torvalds's avatar
Linus Torvalds committed
140

141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
/*
 * shmem_file_setup pre-accounts the whole fixed size of a VM object,
 * for shared memory and for shared anonymous (/dev/zero) mappings
 * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
 * consistent with the pre-accounting of private mappings ...
 */
static inline int shmem_acct_size(unsigned long flags, loff_t size)
{
	return (flags & VM_ACCOUNT)?
		security_vm_enough_memory(VM_ACCT(size)): 0;
}

static inline void shmem_unacct_size(unsigned long flags, loff_t size)
{
	if (flags & VM_ACCOUNT)
		vm_unacct_memory(VM_ACCT(size));
}

/*
 * ... whereas tmpfs objects are accounted incrementally as
 * pages are allocated, in order to allow huge sparse files.
 * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
 * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
 */
static inline int shmem_acct_block(unsigned long flags)
{
	return (flags & VM_ACCOUNT)?
		0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE));
}

static inline void shmem_unacct_blocks(unsigned long flags, long pages)
{
	if (!(flags & VM_ACCOUNT))
		vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
}

Linus Torvalds's avatar
Linus Torvalds committed
177 178 179 180 181
static struct super_operations shmem_ops;
static struct address_space_operations shmem_aops;
static struct file_operations shmem_file_operations;
static struct inode_operations shmem_inode_operations;
static struct inode_operations shmem_dir_inode_operations;
182
static struct inode_operations shmem_special_inode_operations;
Linus Torvalds's avatar
Linus Torvalds committed
183
static struct vm_operations_struct shmem_vm_ops;
Linus Torvalds's avatar
Linus Torvalds committed
184

185 186 187
static struct backing_dev_info shmem_backing_dev_info = {
	.ra_pages	= 0,	/* No readahead */
	.memory_backed	= 1,	/* Does not contribute to dirty memory */
188
	.unplug_io_fn = default_unplug_io_fn,
189 190
};

191 192
static LIST_HEAD(shmem_swaplist);
static spinlock_t shmem_swaplist_lock = SPIN_LOCK_UNLOCKED;
Linus Torvalds's avatar
Linus Torvalds committed
193

194
static void shmem_free_blocks(struct inode *inode, long pages)
195 196
{
	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
197 198
	if (sbinfo) {
		spin_lock(&sbinfo->stat_lock);
199 200
		sbinfo->free_blocks += pages;
		inode->i_blocks -= pages*BLOCKS_PER_PAGE;
201 202
		spin_unlock(&sbinfo->stat_lock);
	}
203 204
}

Linus Torvalds's avatar
Linus Torvalds committed
205 206 207 208 209
/*
 * shmem_recalc_inode - recalculate the size of an inode
 *
 * @inode: inode to recalc
 *
210
 * We have to calculate the free blocks since the mm can drop
211
 * undirtied hole pages behind our back.
Linus Torvalds's avatar
Linus Torvalds committed
212
 *
213 214
 * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
 * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
Linus Torvalds's avatar
Linus Torvalds committed
215 216 217
 *
 * It has to be called with the spinlock held.
 */
218
static void shmem_recalc_inode(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
219
{
220 221
	struct shmem_inode_info *info = SHMEM_I(inode);
	long freed;
Linus Torvalds's avatar
Linus Torvalds committed
222

223 224 225
	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
	if (freed > 0) {
		info->alloced -= freed;
226
		shmem_unacct_blocks(info->flags, freed);
227
		shmem_free_blocks(inode, freed);
Linus Torvalds's avatar
Linus Torvalds committed
228 229 230
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
231 232 233 234 235 236 237 238
/*
 * shmem_swp_entry - find the swap vector position in the info structure
 *
 * @info:  info structure for the inode
 * @index: index of the page to find
 * @page:  optional page to add to the structure. Has to be preset to
 *         all zeros
 *
239
 * If there is no space allocated yet it will return NULL when
240 241
 * page is NULL, else it will use the page for the needed block,
 * setting it to NULL on return to indicate that it has been used.
Linus Torvalds's avatar
Linus Torvalds committed
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
 *
 * The swap vector is organized the following way:
 *
 * There are SHMEM_NR_DIRECT entries directly stored in the
 * shmem_inode_info structure. So small files do not need an addional
 * allocation.
 *
 * For pages with index > SHMEM_NR_DIRECT there is the pointer
 * i_indirect which points to a page which holds in the first half
 * doubly indirect blocks, in the second half triple indirect blocks:
 *
 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
 * following layout (for SHMEM_NR_DIRECT == 16):
 *
 * i_indirect -> dir --> 16-19
 * 	      |	     +-> 20-23
 * 	      |
 * 	      +-->dir2 --> 24-27
 * 	      |	       +-> 28-31
 * 	      |	       +-> 32-35
 * 	      |	       +-> 36-39
 * 	      |
 * 	      +-->dir3 --> 40-43
 * 	       	       +-> 44-47
 * 	      	       +-> 48-51
 * 	      	       +-> 52-55
 */
269
static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
Linus Torvalds's avatar
Linus Torvalds committed
270
{
Linus Torvalds's avatar
Linus Torvalds committed
271
	unsigned long offset;
272 273
	struct page **dir;
	struct page *subdir;
Linus Torvalds's avatar
Linus Torvalds committed
274

275 276
	if (index < SHMEM_NR_DIRECT) {
		shmem_swp_balance_unmap();
Linus Torvalds's avatar
Linus Torvalds committed
277
		return info->i_direct+index;
278
	}
279 280
	if (!info->i_indirect) {
		if (page) {
281 282
			info->i_indirect = *page;
			*page = NULL;
283 284 285
		}
		return NULL;			/* need another page */
	}
Linus Torvalds's avatar
Linus Torvalds committed
286 287

	index -= SHMEM_NR_DIRECT;
Linus Torvalds's avatar
Linus Torvalds committed
288 289
	offset = index % ENTRIES_PER_PAGE;
	index /= ENTRIES_PER_PAGE;
290
	dir = shmem_dir_map(info->i_indirect);
291

Linus Torvalds's avatar
Linus Torvalds committed
292 293
	if (index >= ENTRIES_PER_PAGE/2) {
		index -= ENTRIES_PER_PAGE/2;
294
		dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
Linus Torvalds's avatar
Linus Torvalds committed
295
		index %= ENTRIES_PER_PAGE;
296 297
		subdir = *dir;
		if (!subdir) {
298
			if (page) {
299 300
				*dir = *page;
				*page = NULL;
301
			}
302
			shmem_dir_unmap(dir);
303
			return NULL;		/* need another page */
Linus Torvalds's avatar
Linus Torvalds committed
304
		}
305 306
		shmem_dir_unmap(dir);
		dir = shmem_dir_map(subdir);
Linus Torvalds's avatar
Linus Torvalds committed
307
	}
308

309 310 311 312 313
	dir += index;
	subdir = *dir;
	if (!subdir) {
		if (!page || !(subdir = *page)) {
			shmem_dir_unmap(dir);
314
			return NULL;		/* need a page */
315 316 317
		}
		*dir = subdir;
		*page = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
318
	}
319
	shmem_dir_unmap(dir);
320
	return shmem_swp_map(subdir) + offset;
321 322 323 324 325 326 327 328 329 330
}

static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
{
	long incdec = value? 1: -1;

	entry->val = value;
	info->swapped += incdec;
	if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
		kmap_atomic_to_page(entry)->nr_swapped += incdec;
Linus Torvalds's avatar
Linus Torvalds committed
331 332 333
}

/*
334 335
 * shmem_swp_alloc - get the position of the swap entry for the page.
 *                   If it does not exist allocate the entry.
Linus Torvalds's avatar
Linus Torvalds committed
336 337 338
 *
 * @info:	info structure for the inode
 * @index:	index of the page to find
339
 * @sgp:	check and recheck i_size? skip allocation?
Linus Torvalds's avatar
Linus Torvalds committed
340
 */
341
static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
Linus Torvalds's avatar
Linus Torvalds committed
342
{
343 344
	struct inode *inode = &info->vfs_inode;
	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
345
	struct page *page = NULL;
346
	swp_entry_t *entry;
Linus Torvalds's avatar
Linus Torvalds committed
347

348
	if (sgp != SGP_WRITE &&
Andrew Morton's avatar
Andrew Morton committed
349
	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
350
		return ERR_PTR(-EINVAL);
351

352
	while (!(entry = shmem_swp_entry(info, index, &page))) {
353
		if (sgp == SGP_READ)
354
			return shmem_swp_map(ZERO_PAGE(0));
355 356 357 358 359
		/*
		 * Test free_blocks against 1 not 0, since we have 1 data
		 * page (and perhaps indirect index pages) yet to allocate:
		 * a waste to allocate index if we cannot allocate data.
		 */
360 361 362 363 364 365 366 367
		if (sbinfo) {
			spin_lock(&sbinfo->stat_lock);
			if (sbinfo->free_blocks <= 1) {
				spin_unlock(&sbinfo->stat_lock);
				return ERR_PTR(-ENOSPC);
			}
			sbinfo->free_blocks--;
			inode->i_blocks += BLOCKS_PER_PAGE;
368 369 370
			spin_unlock(&sbinfo->stat_lock);
		}

371
		spin_unlock(&info->lock);
372
		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
373
		if (page) {
374
			clear_highpage(page);
375 376
			page->nr_swapped = 0;
		}
377
		spin_lock(&info->lock);
378 379

		if (!page) {
380
			shmem_free_blocks(inode, 1);
381
			return ERR_PTR(-ENOMEM);
382
		}
383
		if (sgp != SGP_WRITE &&
Andrew Morton's avatar
Andrew Morton committed
384
		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
385 386 387 388 389
			entry = ERR_PTR(-EINVAL);
			break;
		}
		if (info->next_index <= index)
			info->next_index = index + 1;
390 391 392
	}
	if (page) {
		/* another task gave its page, or truncated the file */
393
		shmem_free_blocks(inode, 1);
394
		shmem_dir_free(page);
Linus Torvalds's avatar
Linus Torvalds committed
395
	}
396 397
	if (info->next_index <= index && !IS_ERR(entry))
		info->next_index = index + 1;
398
	return entry;
Linus Torvalds's avatar
Linus Torvalds committed
399 400
}

Linus Torvalds's avatar
Linus Torvalds committed
401 402 403 404
/*
 * shmem_free_swp - free some swap entries in a directory
 *
 * @dir:   pointer to the directory
405
 * @edir:  pointer after last entry of the directory
Linus Torvalds's avatar
Linus Torvalds committed
406
 */
407
static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
Linus Torvalds's avatar
Linus Torvalds committed
408
{
409
	swp_entry_t *ptr;
Linus Torvalds's avatar
Linus Torvalds committed
410 411
	int freed = 0;

412 413 414 415 416 417
	for (ptr = dir; ptr < edir; ptr++) {
		if (ptr->val) {
			free_swap_and_cache(*ptr);
			*ptr = (swp_entry_t){0};
			freed++;
		}
Linus Torvalds's avatar
Linus Torvalds committed
418 419 420 421
	}
	return freed;
}

422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
static int shmem_map_and_free_swp(struct page *subdir,
		int offset, int limit, struct page ***dir)
{
	swp_entry_t *ptr;
	int freed = 0;

	ptr = shmem_swp_map(subdir);
	for (; offset < limit; offset += LATENCY_LIMIT) {
		int size = limit - offset;
		if (size > LATENCY_LIMIT)
			size = LATENCY_LIMIT;
		freed += shmem_free_swp(ptr+offset, ptr+offset+size);
		if (need_resched()) {
			shmem_swp_unmap(ptr);
			if (*dir) {
				shmem_dir_unmap(*dir);
				*dir = NULL;
			}
			cond_resched();
			ptr = shmem_swp_map(subdir);
		}
	}
	shmem_swp_unmap(ptr);
	return freed;
}

static void shmem_free_pages(struct list_head *next)
{
	struct page *page;
	int freed = 0;

	do {
		page = container_of(next, struct page, lru);
		next = next->next;
		shmem_dir_free(page);
		freed++;
		if (freed >= LATENCY_LIMIT) {
			cond_resched();
			freed = 0;
		}
	} while (next);
}

465
static void shmem_truncate(struct inode *inode)
466
{
467 468 469 470 471
	struct shmem_inode_info *info = SHMEM_I(inode);
	unsigned long idx;
	unsigned long size;
	unsigned long limit;
	unsigned long stage;
472
	unsigned long diroff;
473
	struct page **dir;
474 475
	struct page *topdir;
	struct page *middir;
476 477
	struct page *subdir;
	swp_entry_t *ptr;
478 479 480
	LIST_HEAD(pages_to_free);
	long nr_pages_to_free = 0;
	long nr_swaps_freed = 0;
481
	int offset;
482
	int freed;
Linus Torvalds's avatar
Linus Torvalds committed
483

484 485 486 487
	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
	idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
	if (idx >= info->next_index)
		return;
488

489
	spin_lock(&info->lock);
490
	info->flags |= SHMEM_TRUNCATE;
491 492
	limit = info->next_index;
	info->next_index = idx;
493 494 495 496 497 498 499 500
	topdir = info->i_indirect;
	if (topdir && idx <= SHMEM_NR_DIRECT) {
		info->i_indirect = NULL;
		nr_pages_to_free++;
		list_add(&topdir->lru, &pages_to_free);
	}
	spin_unlock(&info->lock);

501 502 503 504 505
	if (info->swapped && idx < SHMEM_NR_DIRECT) {
		ptr = info->i_direct;
		size = limit;
		if (size > SHMEM_NR_DIRECT)
			size = SHMEM_NR_DIRECT;
506
		nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size);
507
	}
508
	if (!topdir)
509 510 511 512 513 514 515 516
		goto done2;

	BUG_ON(limit <= SHMEM_NR_DIRECT);
	limit -= SHMEM_NR_DIRECT;
	idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
	offset = idx % ENTRIES_PER_PAGE;
	idx -= offset;

517
	dir = shmem_dir_map(topdir);
518
	stage = ENTRIES_PER_PAGEPAGE/2;
519 520 521 522
	if (idx < ENTRIES_PER_PAGEPAGE/2) {
		middir = topdir;
		diroff = idx/ENTRIES_PER_PAGE;
	} else {
523 524 525 526
		dir += ENTRIES_PER_PAGE/2;
		dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
		while (stage <= idx)
			stage += ENTRIES_PER_PAGEPAGE;
527
		middir = *dir;
528
		if (*dir) {
529
			diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
530
				ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
531
			if (!diroff && !offset) {
532
				*dir = NULL;
533 534
				nr_pages_to_free++;
				list_add(&middir->lru, &pages_to_free);
535 536
			}
			shmem_dir_unmap(dir);
537
			dir = shmem_dir_map(middir);
538
		} else {
539
			diroff = 0;
540 541
			offset = 0;
			idx = stage;
Linus Torvalds's avatar
Linus Torvalds committed
542
		}
543
	}
Linus Torvalds's avatar
Linus Torvalds committed
544

545
	for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
546
		if (unlikely(idx == stage)) {
547 548
			shmem_dir_unmap(dir);
			dir = shmem_dir_map(topdir) +
549 550 551 552 553 554 555 556
			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
			while (!*dir) {
				dir++;
				idx += ENTRIES_PER_PAGEPAGE;
				if (idx >= limit)
					goto done1;
			}
			stage = idx + ENTRIES_PER_PAGEPAGE;
557
			middir = *dir;
558
			*dir = NULL;
559 560
			nr_pages_to_free++;
			list_add(&middir->lru, &pages_to_free);
561
			shmem_dir_unmap(dir);
562 563 564
			cond_resched();
			dir = shmem_dir_map(middir);
			diroff = 0;
565
		}
566
		subdir = dir[diroff];
567
		if (subdir && subdir->nr_swapped) {
568 569 570
			size = limit - idx;
			if (size > ENTRIES_PER_PAGE)
				size = ENTRIES_PER_PAGE;
571 572 573 574 575 576 577
			freed = shmem_map_and_free_swp(subdir,
						offset, size, &dir);
			if (!dir)
				dir = shmem_dir_map(middir);
			nr_swaps_freed += freed;
			if (offset)
				spin_lock(&info->lock);
578
			subdir->nr_swapped -= freed;
579 580
			if (offset)
				spin_unlock(&info->lock);
581
			BUG_ON(subdir->nr_swapped > offset);
582 583 584 585
		}
		if (offset)
			offset = 0;
		else if (subdir) {
586 587 588
			dir[diroff] = NULL;
			nr_pages_to_free++;
			list_add(&subdir->lru, &pages_to_free);
Linus Torvalds's avatar
Linus Torvalds committed
589 590
		}
	}
591
done1:
592
	shmem_dir_unmap(dir);
593
done2:
594 595 596 597 598
	if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
		/*
		 * Call truncate_inode_pages again: racing shmem_unuse_inode
		 * may have swizzled a page in from swap since vmtruncate or
		 * generic_delete_inode did it, before we lowered next_index.
599 600
		 * Also, though shmem_getpage checks i_size before adding to
		 * cache, no recheck after: so fix the narrow window there too.
601 602 603
		 */
		truncate_inode_pages(inode->i_mapping, inode->i_size);
	}
604 605

	spin_lock(&info->lock);
606
	info->flags &= ~SHMEM_TRUNCATE;
607 608 609
	info->swapped -= nr_swaps_freed;
	if (nr_pages_to_free)
		shmem_free_blocks(inode, nr_pages_to_free);
Linus Torvalds's avatar
Linus Torvalds committed
610
	shmem_recalc_inode(inode);
611
	spin_unlock(&info->lock);
612 613 614 615 616 617 618 619

	/*
	 * Empty swap vector directory pages to be freed?
	 */
	if (!list_empty(&pages_to_free)) {
		pages_to_free.prev->next = NULL;
		shmem_free_pages(pages_to_free.next);
	}
Linus Torvalds's avatar
Linus Torvalds committed
620
}
Linus Torvalds's avatar
Linus Torvalds committed
621

622
static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
Andrew Morton's avatar
Andrew Morton committed
623 624
{
	struct inode *inode = dentry->d_inode;
625
	struct page *page = NULL;
Andrew Morton's avatar
Andrew Morton committed
626 627
	int error;

628 629
	if (attr->ia_valid & ATTR_SIZE) {
		if (attr->ia_size < inode->i_size) {
630 631 632 633 634 635 636 637
			/*
			 * If truncating down to a partial page, then
			 * if that page is already allocated, hold it
			 * in memory until the truncation is over, so
			 * truncate_partial_page cannnot miss it were
			 * it assigned to swap.
			 */
			if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
638 639
				(void) shmem_getpage(inode,
					attr->ia_size>>PAGE_CACHE_SHIFT,
640
						&page, SGP_READ, NULL);
641
			}
642 643 644 645 646 647 648 649 650 651 652 653 654
			/*
			 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
			 * detect if any pages might have been added to cache
			 * after truncate_inode_pages.  But we needn't bother
			 * if it's being fully truncated to zero-length: the
			 * nrpages check is efficient enough in that case.
			 */
			if (attr->ia_size) {
				struct shmem_inode_info *info = SHMEM_I(inode);
				spin_lock(&info->lock);
				info->flags &= ~SHMEM_PAGEIN;
				spin_unlock(&info->lock);
			}
655
		}
Andrew Morton's avatar
Andrew Morton committed
656 657 658 659 660
	}

	error = inode_change_ok(inode, attr);
	if (!error)
		error = inode_setattr(inode, attr);
661 662
	if (page)
		page_cache_release(page);
Andrew Morton's avatar
Andrew Morton committed
663 664 665
	return error;
}

666
static void shmem_delete_inode(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
667 668
{
	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
669
	struct shmem_inode_info *info = SHMEM_I(inode);
Linus Torvalds's avatar
Linus Torvalds committed
670

671
	if (inode->i_op->truncate == shmem_truncate) {
672
		shmem_unacct_size(info->flags, inode->i_size);
673
		inode->i_size = 0;
674
		shmem_truncate(inode);
675 676 677 678 679
		if (!list_empty(&info->swaplist)) {
			spin_lock(&shmem_swaplist_lock);
			list_del_init(&info->swaplist);
			spin_unlock(&shmem_swaplist_lock);
		}
Linus Torvalds's avatar
Linus Torvalds committed
680
	}
681 682 683 684 685 686
	if (sbinfo) {
		BUG_ON(inode->i_blocks);
		spin_lock(&sbinfo->stat_lock);
		sbinfo->free_inodes++;
		spin_unlock(&sbinfo->stat_lock);
	}
Linus Torvalds's avatar
Linus Torvalds committed
687 688
	clear_inode(inode);
}
Linus Torvalds's avatar
Linus Torvalds committed
689

690
static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
691
{
692
	swp_entry_t *ptr;
Linus Torvalds's avatar
Linus Torvalds committed
693

694 695 696
	for (ptr = dir; ptr < edir; ptr++) {
		if (ptr->val == entry.val)
			return ptr - dir;
Linus Torvalds's avatar
Linus Torvalds committed
697
	}
Linus Torvalds's avatar
Linus Torvalds committed
698 699
	return -1;
}
Linus Torvalds's avatar
Linus Torvalds committed
700

701
static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
702
{
703
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
704
	unsigned long idx;
705 706 707 708 709 710
	unsigned long size;
	unsigned long limit;
	unsigned long stage;
	struct page **dir;
	struct page *subdir;
	swp_entry_t *ptr;
Linus Torvalds's avatar
Linus Torvalds committed
711
	int offset;
712 713

	idx = 0;
714
	ptr = info->i_direct;
715 716 717 718 719 720
	spin_lock(&info->lock);
	limit = info->next_index;
	size = limit;
	if (size > SHMEM_NR_DIRECT)
		size = SHMEM_NR_DIRECT;
	offset = shmem_find_swp(entry, ptr, ptr+size);
721 722
	if (offset >= 0) {
		shmem_swp_balance_unmap();
Linus Torvalds's avatar
Linus Torvalds committed
723
		goto found;
724
	}
725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
	if (!info->i_indirect)
		goto lost2;

	dir = shmem_dir_map(info->i_indirect);
	stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;

	for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
		if (unlikely(idx == stage)) {
			shmem_dir_unmap(dir-1);
			dir = shmem_dir_map(info->i_indirect) +
			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
			while (!*dir) {
				dir++;
				idx += ENTRIES_PER_PAGEPAGE;
				if (idx >= limit)
					goto lost1;
			}
			stage = idx + ENTRIES_PER_PAGEPAGE;
			subdir = *dir;
			shmem_dir_unmap(dir);
			dir = shmem_dir_map(subdir);
		}
		subdir = *dir;
748
		if (subdir && subdir->nr_swapped) {
749 750 751 752 753 754 755 756 757 758 759
			ptr = shmem_swp_map(subdir);
			size = limit - idx;
			if (size > ENTRIES_PER_PAGE)
				size = ENTRIES_PER_PAGE;
			offset = shmem_find_swp(entry, ptr, ptr+size);
			if (offset >= 0) {
				shmem_dir_unmap(dir);
				goto found;
			}
			shmem_swp_unmap(ptr);
		}
Linus Torvalds's avatar
Linus Torvalds committed
760
	}
761 762 763 764
lost1:
	shmem_dir_unmap(dir-1);
lost2:
	spin_unlock(&info->lock);
Linus Torvalds's avatar
Linus Torvalds committed
765 766
	return 0;
found:
767 768
	idx += offset;
	inode = &info->vfs_inode;
769 770
	if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) {
		info->flags |= SHMEM_PAGEIN;
771
		shmem_swp_set(info, ptr + offset, 0);
772
	}
773
	shmem_swp_unmap(ptr);
Andrew Morton's avatar
Andrew Morton committed
774
	spin_unlock(&info->lock);
775 776 777 778 779
	/*
	 * Decrement swap count even when the entry is left behind:
	 * try_to_unuse will skip over mms, then reincrement count.
	 */
	swap_free(entry);
780
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
781 782
}

Linus Torvalds's avatar
Linus Torvalds committed
783
/*
Andrew Morton's avatar
Andrew Morton committed
784
 * shmem_unuse() search for an eventually swapped out shmem page.
Linus Torvalds's avatar
Linus Torvalds committed
785
 */
786
int shmem_unuse(swp_entry_t entry, struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
787
{
788
	struct list_head *p, *next;
789
	struct shmem_inode_info *info;
790
	int found = 0;
Linus Torvalds's avatar
Linus Torvalds committed
791

792 793 794 795 796 797
	spin_lock(&shmem_swaplist_lock);
	list_for_each_safe(p, next, &shmem_swaplist) {
		info = list_entry(p, struct shmem_inode_info, swaplist);
		if (!info->swapped)
			list_del_init(&info->swaplist);
		else if (shmem_unuse_inode(info, entry, page)) {
798
			/* move head to start search for next from here */
799
			list_move_tail(&shmem_swaplist, &info->swaplist);
800 801 802
			found = 1;
			break;
		}
Linus Torvalds's avatar
Linus Torvalds committed
803
	}
804
	spin_unlock(&shmem_swaplist_lock);
805
	return found;
Linus Torvalds's avatar
Linus Torvalds committed
806 807 808
}

/*
Linus Torvalds's avatar
Linus Torvalds committed
809
 * Move the page from the page cache to the swap cache.
Linus Torvalds's avatar
Linus Torvalds committed
810
 */
Andrew Morton's avatar
Andrew Morton committed
811
static int shmem_writepage(struct page *page, struct writeback_control *wbc)
Linus Torvalds's avatar
Linus Torvalds committed
812 813 814
{
	struct shmem_inode_info *info;
	swp_entry_t *entry, swap;
Linus Torvalds's avatar
Linus Torvalds committed
815 816
	struct address_space *mapping;
	unsigned long index;
Linus Torvalds's avatar
Linus Torvalds committed
817
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
818

819 820
	BUG_ON(!PageLocked(page));
	BUG_ON(page_mapped(page));
Linus Torvalds's avatar
Linus Torvalds committed
821 822 823 824

	mapping = page->mapping;
	index = page->index;
	inode = mapping->host;
Linus Torvalds's avatar
Linus Torvalds committed
825
	info = SHMEM_I(inode);
826
	if (info->flags & VM_LOCKED)
827
		goto redirty;
Linus Torvalds's avatar
Linus Torvalds committed
828
	swap = get_swap_page();
Linus Torvalds's avatar
Linus Torvalds committed
829
	if (!swap.val)
830
		goto redirty;
Linus Torvalds's avatar
Linus Torvalds committed
831 832

	spin_lock(&info->lock);
Linus Torvalds's avatar
Linus Torvalds committed
833
	shmem_recalc_inode(inode);
834 835 836 837
	if (index >= info->next_index) {
		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
		goto unlock;
	}
838
	entry = shmem_swp_entry(info, index, NULL);
839 840
	BUG_ON(!entry);
	BUG_ON(entry->val);
Linus Torvalds's avatar
Linus Torvalds committed
841

842 843
	if (move_to_swap_cache(page, swap) == 0) {
		shmem_swp_set(info, entry, swap.val);
844
		shmem_swp_unmap(entry);
Linus Torvalds's avatar
Linus Torvalds committed
845
		spin_unlock(&info->lock);
846 847 848 849 850 851
		if (list_empty(&info->swaplist)) {
			spin_lock(&shmem_swaplist_lock);
			/* move instead of add in case we're racing */
			list_move_tail(&info->swaplist, &shmem_swaplist);
			spin_unlock(&shmem_swaplist_lock);
		}
Andrew Morton's avatar
Andrew Morton committed
852
		unlock_page(page);
853
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
854 855
	}

856
	shmem_swp_unmap(entry);
857
unlock:
Linus Torvalds's avatar
Linus Torvalds committed
858
	spin_unlock(&info->lock);
859
	swap_free(swap);
860 861 862
redirty:
	set_page_dirty(page);
	return WRITEPAGE_ACTIVATE;	/* Return with the page locked */
863 864
}

865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932
#ifdef CONFIG_NUMA
static struct page *shmem_swapin_async(struct shared_policy *p,
				       swp_entry_t entry, unsigned long idx)
{
	struct page *page;
	struct vm_area_struct pvma;

	/* Create a pseudo vma that just contains the policy */
	memset(&pvma, 0, sizeof(struct vm_area_struct));
	pvma.vm_end = PAGE_SIZE;
	pvma.vm_pgoff = idx;
	pvma.vm_policy = mpol_shared_policy_lookup(p, idx);
	page = read_swap_cache_async(entry, &pvma, 0);
	mpol_free(pvma.vm_policy);
	return page;
}

struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
			  unsigned long idx)
{
	struct shared_policy *p = &info->policy;
	int i, num;
	struct page *page;
	unsigned long offset;

	num = valid_swaphandles(entry, &offset);
	for (i = 0; i < num; offset++, i++) {
		page = shmem_swapin_async(p,
				swp_entry(swp_type(entry), offset), idx);
		if (!page)
			break;
		page_cache_release(page);
	}
	lru_add_drain();	/* Push any new pages onto the LRU now */
	return shmem_swapin_async(p, entry, idx);
}

static struct page *
shmem_alloc_page(unsigned long gfp, struct shmem_inode_info *info,
		 unsigned long idx)
{
	struct vm_area_struct pvma;
	struct page *page;

	memset(&pvma, 0, sizeof(struct vm_area_struct));
	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
	pvma.vm_pgoff = idx;
	pvma.vm_end = PAGE_SIZE;
	page = alloc_page_vma(gfp, &pvma, 0);
	mpol_free(pvma.vm_policy);
	return page;
}
#else
static inline struct page *
shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
{
	swapin_readahead(entry, 0, NULL);
	return read_swap_cache_async(entry, NULL, 0);
}

static inline struct page *
shmem_alloc_page(unsigned long gfp,struct shmem_inode_info *info,
				 unsigned long idx)
{
	return alloc_page(gfp);
}
#endif

Linus Torvalds's avatar
Linus Torvalds committed
933
/*
934
 * shmem_getpage - either get the page from swap or allocate a new one
Linus Torvalds's avatar
Linus Torvalds committed
935 936 937 938 939
 *
 * If we allocate a new one we do not mark it dirty. That's up to the
 * vm. If we swap it in we mark it dirty since we also free the swap
 * entry since a page cannot live in both the swap and page cache
 */
940 941
static int shmem_getpage(struct inode *inode, unsigned long idx,
			struct page **pagep, enum sgp_type sgp, int *type)
Linus Torvalds's avatar
Linus Torvalds committed
942
{
943
	struct address_space *mapping = inode->i_mapping;
944
	struct shmem_inode_info *info = SHMEM_I(inode);
Linus Torvalds's avatar
Linus Torvalds committed
945
	struct shmem_sb_info *sbinfo;
946 947
	struct page *filepage = *pagep;
	struct page *swappage;
Linus Torvalds's avatar
Linus Torvalds committed
948
	swp_entry_t *entry;
949
	swp_entry_t swap;
950
	int error;
951 952

	if (idx >= SHMEM_MAX_INDEX)
953
		return -EFBIG;
954 955 956 957
	/*
	 * Normally, filepage is NULL on entry, and either found
	 * uptodate immediately, or allocated and zeroed, or read
	 * in under swappage, which is then assigned to filepage.
958 959 960
	 * But shmem_prepare_write passes in a locked filepage,
	 * which may be found not uptodate by other callers too,
	 * and may need to be copied from the swappage read in.
961
	 */
Linus Torvalds's avatar
Linus Torvalds committed
962
repeat:
963 964 965 966 967
	if (!filepage)
		filepage = find_lock_page(mapping, idx);
	if (filepage && PageUptodate(filepage))
		goto done;
	error = 0;
968 969
	if (sgp == SGP_QUICK)
		goto failed;
Linus Torvalds's avatar
Linus Torvalds committed
970

971 972
	spin_lock(&info->lock);
	shmem_recalc_inode(inode);
973
	entry = shmem_swp_alloc(info, idx, sgp);
974 975
	if (IS_ERR(entry)) {
		spin_unlock(&info->lock);
976 977
		error = PTR_ERR(entry);
		goto failed;
Linus Torvalds's avatar
Linus Torvalds committed
978
	}
979 980 981
	swap = *entry;

	if (swap.val) {
Linus Torvalds's avatar
Linus Torvalds committed
982
		/* Look it up and read it in.. */
983 984
		swappage = lookup_swap_cache(swap);
		if (!swappage) {
985
			shmem_swp_unmap(entry);
986
			spin_unlock(&info->lock);
987
			/* here we actually do the io */
988
			if (type && *type == VM_FAULT_MINOR) {
989
				inc_page_state(pgmajfault);
990 991
				*type = VM_FAULT_MAJOR;
			}
992
			swappage = shmem_swapin(info, swap, idx);
993
			if (!swappage) {
994
				spin_lock(&info->lock);
995
				entry = shmem_swp_alloc(info, idx, sgp);
996 997
				if (IS_ERR(entry))
					error = PTR_ERR(entry);
998 999 1000 1001 1002
				else {
					if (entry->val == swap.val)
						error = -ENOMEM;
					shmem_swp_unmap(entry);
				}
1003 1004
				spin_unlock(&info->lock);
				if (error)
1005
					goto failed;
1006
				goto repeat;
Linus Torvalds's avatar
Linus Torvalds committed
1007
			}
1008 1009
			wait_on_page_locked(swappage);
			page_cache_release(swappage);
Linus Torvalds's avatar
Linus Torvalds committed
1010
			goto repeat;
Linus Torvalds's avatar
Linus Torvalds committed
1011 1012
		}

1013
		/* We have to do this with page locked to prevent races */
1014
		if (TestSetPageLocked(swappage)) {
1015
			shmem_swp_unmap(entry);
1016
			spin_unlock(&info->lock);
1017 1018
			wait_on_page_locked(swappage);
			page_cache_release(swappage);
1019 1020
			goto repeat;
		}
1021
		if (PageWriteback(swappage)) {
1022
			shmem_swp_unmap(entry);
1023
			spin_unlock(&info->lock);
1024 1025 1026
			wait_on_page_writeback(swappage);
			unlock_page(swappage);
			page_cache_release(swappage);
1027 1028
			goto repeat;
		}
1029
		if (!PageUptodate(swappage)) {
1030
			shmem_swp_unmap(entry);
1031
			spin_unlock(&info->lock);
1032 1033 1034 1035
			unlock_page(swappage);
			page_cache_release(swappage);
			error = -EIO;
			goto failed;
1036 1037
		}

1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
		if (filepage) {
			shmem_swp_set(info, entry, 0);
			shmem_swp_unmap(entry);
			delete_from_swap_cache(swappage);
			spin_unlock(&info->lock);
			copy_highpage(filepage, swappage);
			unlock_page(swappage);
			page_cache_release(swappage);
			flush_dcache_page(filepage);
			SetPageUptodate(filepage);
			set_page_dirty(filepage);
			swap_free(swap);
1050 1051
		} else if (!(error = move_from_swap_cache(
				swappage, idx, mapping))) {
1052
			info->flags |= SHMEM_PAGEIN;
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
			shmem_swp_set(info, entry, 0);
			shmem_swp_unmap(entry);
			spin_unlock(&info->lock);
			filepage = swappage;
			swap_free(swap);
		} else {
			shmem_swp_unmap(entry);
			spin_unlock(&info->lock);
			unlock_page(swappage);
			page_cache_release(swappage);
1063 1064 1065 1066
			if (error == -ENOMEM) {
				/* let kswapd refresh zone for GFP_ATOMICs */
				blk_congestion_wait(WRITE, HZ/50);
			}
1067 1068 1069
			goto repeat;
		}
	} else if (sgp == SGP_READ && !filepage) {
1070
		shmem_swp_unmap(entry);
1071 1072 1073
		filepage = find_get_page(mapping, idx);
		if (filepage &&
		    (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
1074
			spin_unlock(&info->lock);
1075 1076 1077
			wait_on_page_locked(filepage);
			page_cache_release(filepage);
			filepage = NULL;
1078 1079 1080
			goto repeat;
		}
		spin_unlock(&info->lock);
Linus Torvalds's avatar
Linus Torvalds committed
1081
	} else {
1082
		shmem_swp_unmap(entry);
Linus Torvalds's avatar
Linus Torvalds committed
1083
		sbinfo = SHMEM_SB(inode->i_sb);
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
		if (sbinfo) {
			spin_lock(&sbinfo->stat_lock);
			if (sbinfo->free_blocks == 0 ||
			    shmem_acct_block(info->flags)) {
				spin_unlock(&sbinfo->stat_lock);
				spin_unlock(&info->lock);
				error = -ENOSPC;
				goto failed;
			}
			sbinfo->free_blocks--;
			inode->i_blocks += BLOCKS_PER_PAGE;
1095
			spin_unlock(&sbinfo->stat_lock);
1096
		} else if (shmem_acct_block(info->flags)) {
1097 1098 1099
			spin_unlock(&info->lock);
			error = -ENOSPC;
			goto failed;
1100
		}
Linus Torvalds's avatar
Linus Torvalds committed
1101

1102
		if (!filepage) {
1103
			spin_unlock(&info->lock);
1104 1105 1106
			filepage = shmem_alloc_page(mapping_gfp_mask(mapping),
						    info,
						    idx);
1107
			if (!filepage) {
1108
				shmem_unacct_blocks(info->flags, 1);
1109
				shmem_free_blocks(inode, 1);
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
				error = -ENOMEM;
				goto failed;
			}

			spin_lock(&info->lock);
			entry = shmem_swp_alloc(info, idx, sgp);
			if (IS_ERR(entry))
				error = PTR_ERR(entry);
			else {
				swap = *entry;
				shmem_swp_unmap(entry);
			}
Andrew Morton's avatar
Andrew Morton committed
1122 1123
			if (error || swap.val || 0 != add_to_page_cache_lru(
					filepage, mapping, idx, GFP_ATOMIC)) {
1124 1125
				spin_unlock(&info->lock);
				page_cache_release(filepage);
1126
				shmem_unacct_blocks(info->flags, 1);
1127
				shmem_free_blocks(inode, 1);
1128
				filepage = NULL;
Andrew Morton's avatar
Andrew Morton committed
1129
				if (error)
1130 1131 1132
					goto failed;
				goto repeat;
			}
1133
			info->flags |= SHMEM_PAGEIN;
1134
		}
1135

1136
		info->alloced++;
1137
		spin_unlock(&info->lock);
1138 1139 1140 1141 1142
		clear_highpage(filepage);
		flush_dcache_page(filepage);
		SetPageUptodate(filepage);
	}
done:
1143 1144 1145
	if (*pagep != filepage) {
		unlock_page(filepage);
		*pagep = filepage;
Linus Torvalds's avatar
Linus Torvalds committed
1146
	}
Linus Torvalds's avatar
Linus Torvalds committed
1147
	return 0;
1148 1149 1150 1151 1152 1153 1154

failed:
	if (*pagep != filepage) {
		unlock_page(filepage);
		page_cache_release(filepage);
	}
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
1155 1156
}

1157
struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
Linus Torvalds's avatar
Linus Torvalds committed
1158
{
1159
	struct inode *inode = vma->vm_file->f_dentry->d_inode;
1160
	struct page *page = NULL;
1161 1162
	unsigned long idx;
	int error;
Linus Torvalds's avatar
Linus Torvalds committed
1163

1164
	idx = (address - vma->vm_start) >> PAGE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
1165
	idx += vma->vm_pgoff;
1166
	idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
1167

1168
	error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
1169 1170
	if (error)
		return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
Linus Torvalds's avatar
Linus Torvalds committed
1171

1172
	mark_page_accessed(page);
1173
	return page;
Linus Torvalds's avatar
Linus Torvalds committed
1174 1175
}

1176 1177
static int shmem_populate(struct vm_area_struct *vma,
	unsigned long addr, unsigned long len,
1178
	pgprot_t prot, unsigned long pgoff, int nonblock)
1179 1180 1181 1182 1183 1184
{
	struct inode *inode = vma->vm_file->f_dentry->d_inode;
	struct mm_struct *mm = vma->vm_mm;
	enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
	unsigned long size;

Andrew Morton's avatar
Andrew Morton committed
1185
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1186 1187
	if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
		return -EINVAL;
Andrew Morton's avatar
Andrew Morton committed
1188

1189 1190 1191 1192 1193 1194
	while ((long) len > 0) {
		struct page *page = NULL;
		int err;
		/*
		 * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
		 */
1195
		err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
1196 1197 1198
		if (err)
			return err;
		if (page) {
1199
			mark_page_accessed(page);
1200 1201 1202 1203 1204
			err = install_page(mm, vma, addr, page, prot);
			if (err) {
				page_cache_release(page);
				return err;
			}
1205
		} else if (nonblock) {
1206 1207 1208
    			err = install_file_pte(mm, vma, addr, pgoff, prot);
			if (err)
	    			return err;
1209
		}
1210

1211 1212 1213 1214 1215 1216 1217
		len -= PAGE_SIZE;
		addr += PAGE_SIZE;
		pgoff++;
	}
	return 0;
}

1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
#ifdef CONFIG_NUMA
int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
{
	struct inode *i = vma->vm_file->f_dentry->d_inode;
	return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
}

struct mempolicy *
shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
{
	struct inode *i = vma->vm_file->f_dentry->d_inode;
	unsigned long idx;

	idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
	return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
}
#endif

1236
int shmem_lock(struct file *file, int lock, struct user_struct *user)
Linus Torvalds's avatar
Linus Torvalds committed
1237
{
1238 1239
	struct inode *inode = file->f_dentry->d_inode;
	struct shmem_inode_info *info = SHMEM_I(inode);
1240
	int retval = -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
1241

1242
	spin_lock(&info->lock);
1243 1244 1245
	if (lock && !(info->flags & VM_LOCKED)) {
		if (!user_shm_lock(inode->i_size, user))
			goto out_nomem;
1246
		info->flags |= VM_LOCKED;
1247 1248 1249
	}
	if (!lock && (info->flags & VM_LOCKED) && user) {
		user_shm_unlock(inode->i_size, user);
1250
		info->flags &= ~VM_LOCKED;
1251 1252 1253
	}
	retval = 0;
out_nomem:
1254
	spin_unlock(&info->lock);
1255
	return retval;
Linus Torvalds's avatar
Linus Torvalds committed
1256 1257
}

1258
static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
Linus Torvalds's avatar
Linus Torvalds committed
1259
{
1260 1261
	file_accessed(file);
	vma->vm_ops = &shmem_vm_ops;
Linus Torvalds's avatar
Linus Torvalds committed
1262
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1263 1264
}

1265 1266
static struct inode *
shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
Linus Torvalds's avatar
Linus Torvalds committed
1267
{
1268
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
1269 1270
	struct shmem_inode_info *info;
	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
Linus Torvalds's avatar
Linus Torvalds committed
1271

1272 1273 1274 1275 1276 1277 1278
	if (sbinfo) {
		spin_lock(&sbinfo->stat_lock);
		if (!sbinfo->free_inodes) {
			spin_unlock(&sbinfo->stat_lock);
			return NULL;
		}
		sbinfo->free_inodes--;
1279
		spin_unlock(&sbinfo->stat_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289
	}

	inode = new_inode(sb);
	if (inode) {
		inode->i_mode = mode;
		inode->i_uid = current->fsuid;
		inode->i_gid = current->fsgid;
		inode->i_blksize = PAGE_CACHE_SIZE;
		inode->i_blocks = 0;
		inode->i_mapping->a_ops = &shmem_aops;
1290
		inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
Linus Torvalds's avatar
Linus Torvalds committed
1291
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
Linus Torvalds's avatar
Linus Torvalds committed
1292
		info = SHMEM_I(inode);
1293 1294
		memset(info, 0, (char *)inode - (char *)info);
		spin_lock_init(&info->lock);
1295 1296
		INIT_LIST_HEAD(&info->swaplist);

Linus Torvalds's avatar
Linus Torvalds committed
1297 1298
		switch (mode & S_IFMT) {
		default:
1299
			inode->i_op = &shmem_special_inode_operations;
Linus Torvalds's avatar
Linus Torvalds committed
1300 1301 1302 1303 1304
			init_special_inode(inode, mode, dev);
			break;
		case S_IFREG:
			inode->i_op = &shmem_inode_operations;
			inode->i_fop = &shmem_file_operations;
1305
			mpol_shared_policy_init(&info->policy);
Linus Torvalds's avatar
Linus Torvalds committed
1306 1307
			break;
		case S_IFDIR:
Linus Torvalds's avatar
Linus Torvalds committed
1308
			inode->i_nlink++;
1309 1310
			/* Some things misbehave if size == 0 on a directory */
			inode->i_size = 2 * BOGO_DIRENT_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
1311
			inode->i_op = &shmem_dir_inode_operations;
Alexander Viro's avatar
Alexander Viro committed
1312
			inode->i_fop = &simple_dir_operations;
Linus Torvalds's avatar
Linus Torvalds committed
1313 1314
			break;
		case S_IFLNK:
1315 1316 1317 1318 1319
			/*
			 * Must not load anything in the rbtree,
			 * mpol_free_shared_policy will not be called.
			 */
			mpol_shared_policy_init(&info->policy);
Linus Torvalds's avatar
Linus Torvalds committed
1320
			break;
Linus Torvalds's avatar
Linus Torvalds committed
1321
		}
1322 1323 1324 1325
	} else if (sbinfo) {
		spin_lock(&sbinfo->stat_lock);
		sbinfo->free_inodes++;
		spin_unlock(&sbinfo->stat_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1326 1327 1328 1329
	}
	return inode;
}

1330 1331 1332
#ifdef CONFIG_TMPFS

static int shmem_set_size(struct shmem_sb_info *sbinfo,
Linus Torvalds's avatar
Linus Torvalds committed
1333 1334 1335 1336 1337
			  unsigned long max_blocks, unsigned long max_inodes)
{
	int error;
	unsigned long blocks, inodes;

1338 1339 1340
	spin_lock(&sbinfo->stat_lock);
	blocks = sbinfo->max_blocks - sbinfo->free_blocks;
	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
Linus Torvalds's avatar
Linus Torvalds committed
1341 1342 1343 1344 1345 1346
	error = -EINVAL;
	if (max_blocks < blocks)
		goto out;
	if (max_inodes < inodes)
		goto out;
	error = 0;
1347 1348 1349 1350
	sbinfo->max_blocks  = max_blocks;
	sbinfo->free_blocks = max_blocks - blocks;
	sbinfo->max_inodes  = max_inodes;
	sbinfo->free_inodes = max_inodes - inodes;
Linus Torvalds's avatar
Linus Torvalds committed
1351
out:
1352
	spin_unlock(&sbinfo->stat_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1353 1354 1355
	return error;
}

Linus Torvalds's avatar
Linus Torvalds committed
1356 1357 1358
static struct inode_operations shmem_symlink_inode_operations;
static struct inode_operations shmem_symlink_inline_operations;

1359
/*
1360 1361
 * Normally tmpfs makes no use of shmem_prepare_write, but it
 * lets a tmpfs file be used read-write below the loop driver.
1362 1363 1364 1365 1366
 */
static int
shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
	struct inode *inode = page->mapping->host;
1367
	return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
1368 1369
}

Linus Torvalds's avatar
Linus Torvalds committed
1370
static ssize_t
1371
shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
Linus Torvalds's avatar
Linus Torvalds committed
1372
{
1373
	struct inode	*inode = file->f_dentry->d_inode;
Linus Torvalds's avatar
Linus Torvalds committed
1374 1375
	loff_t		pos;
	unsigned long	written;
1376
	ssize_t		err;
Linus Torvalds's avatar
Linus Torvalds committed
1377

1378 1379 1380 1381 1382
	if ((ssize_t) count < 0)
		return -EINVAL;

	if (!access_ok(VERIFY_READ, buf, count))
		return -EFAULT;
Linus Torvalds's avatar
Linus Torvalds committed
1383 1384 1385 1386 1387 1388

	down(&inode->i_sem);

	pos = *ppos;
	written = 0;

1389
	err = generic_write_checks(file, &pos, &count, 0);
1390 1391
	if (err || !count)
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
1392

1393 1394 1395 1396
	err = remove_suid(file->f_dentry);
	if (err)
		goto out;

1397
	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
Linus Torvalds's avatar
Linus Torvalds committed
1398

1399
	do {
1400
		struct page *page = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1401 1402
		unsigned long bytes, index, offset;
		char *kaddr;
1403
		int left;
Linus Torvalds's avatar
Linus Torvalds committed
1404 1405 1406 1407

		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
		index = pos >> PAGE_CACHE_SHIFT;
		bytes = PAGE_CACHE_SIZE - offset;
1408
		if (bytes > count)
Linus Torvalds's avatar
Linus Torvalds committed
1409 1410 1411
			bytes = count;

		/*
1412 1413
		 * We don't hold page lock across copy from user -
		 * what would it guard against? - so no deadlock here.
1414
		 * But it still may be a good idea to prefault below.
Linus Torvalds's avatar
Linus Torvalds committed
1415 1416
		 */

1417
		err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
1418
		if (err)
Linus Torvalds's avatar
Linus Torvalds committed
1419 1420
			break;

1421 1422 1423 1424 1425
		left = bytes;
		if (PageHighMem(page)) {
			volatile unsigned char dummy;
			__get_user(dummy, buf);
			__get_user(dummy, buf + bytes - 1);
Linus Torvalds's avatar
Linus Torvalds committed
1426

1427
			kaddr = kmap_atomic(page, KM_USER0);
Hugh Dickins's avatar
Hugh Dickins committed
1428 1429
			left = __copy_from_user_inatomic(kaddr + offset,
							buf, bytes);
1430 1431 1432 1433 1434 1435 1436
			kunmap_atomic(kaddr, KM_USER0);
		}
		if (left) {
			kaddr = kmap(page);
			left = __copy_from_user(kaddr + offset, buf, bytes);
			kunmap(page);
		}
1437 1438 1439 1440 1441 1442

		written += bytes;
		count -= bytes;
		pos += bytes;
		buf += bytes;
		if (pos > inode->i_size)
1443
			i_size_write(inode, pos);
1444

Linus Torvalds's avatar
Linus Torvalds committed
1445
		flush_dcache_page(page);
1446
		set_page_dirty(page);
1447
		mark_page_accessed(page);
1448 1449
		page_cache_release(page);

1450
		if (left) {
1451 1452
			pos -= left;
			written -= left;
1453
			err = -EFAULT;
1454
			break;
Linus Torvalds's avatar
Linus Torvalds committed
1455
		}
1456 1457

		/*
1458 1459
		 * Our dirty pages are not counted in nr_dirty,
		 * and we do not attempt to balance dirty pages.
1460 1461
		 */

1462
		cond_resched();
1463
	} while (count);
Linus Torvalds's avatar
Linus Torvalds committed
1464

1465
	*ppos = pos;
1466 1467 1468
	if (written)
		err = written;
out:
Linus Torvalds's avatar
Linus Torvalds committed
1469 1470 1471 1472
	up(&inode->i_sem);
	return err;
}

1473
static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
Linus Torvalds's avatar
Linus Torvalds committed
1474 1475 1476 1477 1478 1479 1480 1481
{
	struct inode *inode = filp->f_dentry->d_inode;
	struct address_space *mapping = inode->i_mapping;
	unsigned long index, offset;

	index = *ppos >> PAGE_CACHE_SHIFT;
	offset = *ppos & ~PAGE_CACHE_MASK;

1482
	for (;;) {
1483
		struct page *page = NULL;
1484
		unsigned long end_index, nr, ret;
Andrew Morton's avatar
Andrew Morton committed
1485
		loff_t i_size = i_size_read(inode);
Linus Torvalds's avatar
Linus Torvalds committed
1486

Andrew Morton's avatar
Andrew Morton committed
1487
		end_index = i_size >> PAGE_CACHE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
1488 1489 1490
		if (index > end_index)
			break;
		if (index == end_index) {
Andrew Morton's avatar
Andrew Morton committed
1491
			nr = i_size & ~PAGE_CACHE_MASK;
Linus Torvalds's avatar
Linus Torvalds committed
1492 1493 1494 1495
			if (nr <= offset)
				break;
		}

1496
		desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL);
1497
		if (desc->error) {
1498
			if (desc->error == -EINVAL)
1499
				desc->error = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1500
			break;
1501 1502 1503 1504 1505 1506 1507
		}

		/*
		 * We must evaluate after, since reads (unlike writes)
		 * are called without i_sem protection against truncate
		 */
		nr = PAGE_CACHE_SIZE;
Andrew Morton's avatar
Andrew Morton committed
1508 1509
		i_size = i_size_read(inode);
		end_index = i_size >> PAGE_CACHE_SHIFT;
1510
		if (index == end_index) {
Andrew Morton's avatar
Andrew Morton committed
1511
			nr = i_size & ~PAGE_CACHE_MASK;
1512
			if (nr <= offset) {
1513 1514
				if (page)
					page_cache_release(page);
1515 1516 1517 1518
				break;
			}
		}
		nr -= offset;
Linus Torvalds's avatar
Linus Torvalds committed
1519

1520
		if (page) {
1521 1522 1523 1524 1525
			/*
			 * If users can be writing to this page using arbitrary
			 * virtual addresses, take care about potential aliasing
			 * before reading the page on the kernel side.
			 */
1526
			if (mapping_writably_mapped(mapping))
1527 1528 1529 1530 1531 1532
				flush_dcache_page(page);
			/*
			 * Mark the page accessed if we read the beginning.
			 */
			if (!offset)
				mark_page_accessed(page);
1533 1534
		} else
			page = ZERO_PAGE(0);
Linus Torvalds's avatar
Linus Torvalds committed
1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545

		/*
		 * Ok, we have the page, and it's up-to-date, so
		 * now we can copy it to user space...
		 *
		 * The actor routine returns how many bytes were actually used..
		 * NOTE! This may not be the same as how much of a user buffer
		 * we filled up (we may be padding etc), so we can only update
		 * "pos" here (the actor routine has to update the user buffer
		 * pointers and the remaining count).
		 */
1546 1547
		ret = actor(desc, page, offset, nr);
		offset += ret;
Linus Torvalds's avatar
Linus Torvalds committed
1548 1549
		index += offset >> PAGE_CACHE_SHIFT;
		offset &= ~PAGE_CACHE_MASK;
1550

Linus Torvalds's avatar
Linus Torvalds committed
1551
		page_cache_release(page);
1552 1553
		if (ret != nr || !desc->count)
			break;
1554 1555

		cond_resched();
Linus Torvalds's avatar
Linus Torvalds committed
1556 1557 1558
	}

	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1559
	file_accessed(filp);
Linus Torvalds's avatar
Linus Torvalds committed
1560 1561
}

1562
static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
Linus Torvalds's avatar
Linus Torvalds committed
1563
{
1564 1565 1566 1567 1568 1569 1570 1571
	read_descriptor_t desc;

	if ((ssize_t) count < 0)
		return -EINVAL;
	if (!access_ok(VERIFY_WRITE, buf, count))
		return -EFAULT;
	if (!count)
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1572

1573 1574
	desc.written = 0;
	desc.count = count;
1575
	desc.arg.buf = buf;
1576
	desc.error = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1577

1578 1579 1580 1581 1582
	do_shmem_file_read(filp, ppos, &desc, file_read_actor);
	if (desc.written)
		return desc.written;
	return desc.error;
}
Linus Torvalds's avatar
Linus Torvalds committed
1583

1584
static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
1585
			 size_t count, read_actor_t actor, void *target)
1586 1587
{
	read_descriptor_t desc;
Linus Torvalds's avatar
Linus Torvalds committed
1588

1589 1590 1591 1592 1593
	if (!count)
		return 0;

	desc.written = 0;
	desc.count = count;
1594
	desc.arg.data = target;
1595 1596
	desc.error = 0;

1597
	do_shmem_file_read(in_file, ppos, &desc, actor);
1598 1599 1600
	if (desc.written)
		return desc.written;
	return desc.error;
Linus Torvalds's avatar
Linus Torvalds committed
1601 1602
}

1603
static int shmem_statfs(struct super_block *sb, struct kstatfs *buf)
Linus Torvalds's avatar
Linus Torvalds committed
1604
{
Linus Torvalds's avatar
Linus Torvalds committed
1605 1606
	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);

Linus Torvalds's avatar
Linus Torvalds committed
1607
	buf->f_type = TMPFS_MAGIC;
Linus Torvalds's avatar
Linus Torvalds committed
1608
	buf->f_bsize = PAGE_CACHE_SIZE;
Hugh Dickins's avatar
Hugh Dickins committed
1609
	buf->f_namelen = NAME_MAX;
1610 1611 1612 1613 1614 1615 1616 1617 1618
	if (sbinfo) {
		spin_lock(&sbinfo->stat_lock);
		buf->f_blocks = sbinfo->max_blocks;
		buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
		buf->f_files = sbinfo->max_inodes;
		buf->f_ffree = sbinfo->free_inodes;
		spin_unlock(&sbinfo->stat_lock);
	}
	/* else leave those fields 0 like simple_statfs */
Linus Torvalds's avatar
Linus Torvalds committed
1619 1620 1621 1622 1623 1624
	return 0;
}

/*
 * File creation. Allocate an inode, and we're done..
 */
1625 1626
static int
shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
Linus Torvalds's avatar
Linus Torvalds committed
1627
{
1628
	struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
Linus Torvalds's avatar
Linus Torvalds committed
1629 1630 1631
	int error = -ENOSPC;

	if (inode) {
1632 1633 1634 1635 1636
		if (dir->i_mode & S_ISGID) {
			inode->i_gid = dir->i_gid;
			if (S_ISDIR(mode))
				inode->i_mode |= S_ISGID;
		}
1637
		dir->i_size += BOGO_DIRENT_SIZE;
Hugh Dickins's avatar
Hugh Dickins committed
1638
		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
Linus Torvalds's avatar
Linus Torvalds committed
1639 1640 1641 1642 1643 1644 1645
		d_instantiate(dentry, inode);
		dget(dentry); /* Extra count - pin the dentry in core */
		error = 0;
	}
	return error;
}

1646
static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
Linus Torvalds's avatar
Linus Torvalds committed
1647
{
Linus Torvalds's avatar
Linus Torvalds committed
1648 1649 1650 1651 1652 1653
	int error;

	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
		return error;
	dir->i_nlink++;
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1654 1655
}

1656 1657
static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
		struct nameidata *nd)
Linus Torvalds's avatar
Linus Torvalds committed
1658 1659 1660 1661 1662 1663 1664
{
	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
}

/*
 * Link a file..
 */
1665
static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
1666 1667
{
	struct inode *inode = old_dentry->d_inode;
1668 1669 1670 1671 1672 1673 1674
	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);

	/*
	 * No ordinary (disk based) filesystem counts links as inodes;
	 * but each new link needs a new dentry, pinning lowmem, and
	 * tmpfs dentries cannot be pruned until they are unlinked.
	 */
1675 1676 1677 1678 1679 1680 1681
	if (sbinfo) {
		spin_lock(&sbinfo->stat_lock);
		if (!sbinfo->free_inodes) {
			spin_unlock(&sbinfo->stat_lock);
			return -ENOSPC;
		}
		sbinfo->free_inodes--;
1682 1683
		spin_unlock(&sbinfo->stat_lock);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1684

1685
	dir->i_size += BOGO_DIRENT_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
1686
	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
Linus Torvalds's avatar
Linus Torvalds committed
1687 1688 1689 1690 1691 1692 1693
	inode->i_nlink++;
	atomic_inc(&inode->i_count);	/* New dentry reference */
	dget(dentry);		/* Extra pinning count for the created dentry */
	d_instantiate(dentry, inode);
	return 0;
}

1694
static int shmem_unlink(struct inode *dir, struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
1695
{
Linus Torvalds's avatar
Linus Torvalds committed
1696
	struct inode *inode = dentry->d_inode;
1697

1698 1699
	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) {
		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
1700 1701 1702 1703 1704
		if (sbinfo) {
			spin_lock(&sbinfo->stat_lock);
			sbinfo->free_inodes++;
			spin_unlock(&sbinfo->stat_lock);
		}
1705 1706
	}

1707
	dir->i_size -= BOGO_DIRENT_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
1708 1709 1710 1711 1712
	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
	inode->i_nlink--;
	dput(dentry);	/* Undo the count from "create" - this does all the work */
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
1713

1714
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
Linus Torvalds's avatar
Linus Torvalds committed
1715
{
1716
	if (!simple_empty(dentry))
Linus Torvalds's avatar
Linus Torvalds committed
1717
		return -ENOTEMPTY;
Linus Torvalds's avatar
Linus Torvalds committed
1718

Linus Torvalds's avatar
Linus Torvalds committed
1719 1720
	dir->i_nlink--;
	return shmem_unlink(dir, dentry);
Linus Torvalds's avatar
Linus Torvalds committed
1721 1722 1723 1724 1725 1726 1727 1728
}

/*
 * The VFS layer already does all the dentry stuff for rename,
 * we just have to decrement the usage count for the target if
 * it exists so that the VFS layer correctly free's it when it
 * gets overwritten.
 */
1729
static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
Linus Torvalds's avatar
Linus Torvalds committed
1730
{
Hugh Dickins's avatar
Hugh Dickins committed
1731 1732
	struct inode *inode = old_dentry->d_inode;
	int they_are_dirs = S_ISDIR(inode->i_mode);
1733

1734
	if (!simple_empty(new_dentry))
1735 1736
		return -ENOTEMPTY;

Hugh Dickins's avatar
Hugh Dickins committed
1737 1738 1739 1740 1741
	if (new_dentry->d_inode) {
		(void) shmem_unlink(new_dir, new_dentry);
		if (they_are_dirs)
			old_dir->i_nlink--;
	} else if (they_are_dirs) {
1742 1743 1744 1745
		old_dir->i_nlink--;
		new_dir->i_nlink++;
	}

1746 1747
	old_dir->i_size -= BOGO_DIRENT_SIZE;
	new_dir->i_size += BOGO_DIRENT_SIZE;
Hugh Dickins's avatar
Hugh Dickins committed
1748 1749 1750
	old_dir->i_ctime = old_dir->i_mtime =
	new_dir->i_ctime = new_dir->i_mtime =
	inode->i_ctime = CURRENT_TIME;
1751
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1752 1753
}

1754
static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
Linus Torvalds's avatar
Linus Torvalds committed
1755
{
1756
	int error;
Linus Torvalds's avatar
Linus Torvalds committed
1757 1758
	int len;
	struct inode *inode;
1759
	struct page *page = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1760
	char *kaddr;
1761
	struct shmem_inode_info *info;
Linus Torvalds's avatar
Linus Torvalds committed
1762

1763 1764 1765 1766
	len = strlen(symname) + 1;
	if (len > PAGE_CACHE_SIZE)
		return -ENAMETOOLONG;

1767 1768 1769
	inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
	if (!inode)
		return -ENOSPC;
Linus Torvalds's avatar
Linus Torvalds committed
1770

Linus Torvalds's avatar
Linus Torvalds committed
1771
	info = SHMEM_I(inode);
Linus Torvalds's avatar
Linus Torvalds committed
1772
	inode->i_size = len-1;
1773
	if (len <= (char *)inode - (char *)info) {
Linus Torvalds's avatar
Linus Torvalds committed
1774 1775 1776 1777
		/* do it inline */
		memcpy(info, symname, len);
		inode->i_op = &shmem_symlink_inline_operations;
	} else {
1778
		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
1779
		if (error) {
1780
			iput(inode);
1781
			return error;
Linus Torvalds's avatar
Linus Torvalds committed
1782
		}
1783
		inode->i_op = &shmem_symlink_inode_operations;
1784
		kaddr = kmap_atomic(page, KM_USER0);
Linus Torvalds's avatar
Linus Torvalds committed
1785
		memcpy(kaddr, symname, len);
1786
		kunmap_atomic(kaddr, KM_USER0);
1787
		set_page_dirty(page);
Linus Torvalds's avatar
Linus Torvalds committed
1788 1789
		page_cache_release(page);
	}
1790 1791
	if (dir->i_mode & S_ISGID)
		inode->i_gid = dir->i_gid;
1792
	dir->i_size += BOGO_DIRENT_SIZE;
Linus Torvalds's avatar
Linus Torvalds committed
1793
	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1794 1795
	d_instantiate(dentry, inode);
	dget(dentry);
Linus Torvalds's avatar
Linus Torvalds committed
1796
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1797 1798 1799 1800
}

static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
{
Alexander Viro's avatar
Alexander Viro committed
1801 1802
	nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1803 1804
}

Alexander Viro's avatar
Alexander Viro committed
1805
static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
Linus Torvalds's avatar
Linus Torvalds committed
1806
{
1807
	struct page *page = NULL;
1808
	int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
Alexander Viro's avatar
Alexander Viro committed
1809 1810
	nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1811 1812
}

Alexander Viro's avatar
Alexander Viro committed
1813
static void shmem_put_link(struct dentry *dentry, struct nameidata *nd)
Linus Torvalds's avatar
Linus Torvalds committed
1814
{
Alexander Viro's avatar
Alexander Viro committed
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
	if (!IS_ERR(nd_get_link(nd))) {
		struct page *page;

		page = find_get_page(dentry->d_inode->i_mapping, 0);
		if (!page)
			BUG();
		kunmap(page);
		mark_page_accessed(page);
		page_cache_release(page);
		page_cache_release(page);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1826 1827
}

Linus Torvalds's avatar
Linus Torvalds committed
1828
static struct inode_operations shmem_symlink_inline_operations = {
Alexander Viro's avatar
Alexander Viro committed
1829
	.readlink	= generic_readlink,
1830
	.follow_link	= shmem_follow_link_inline,
1831 1832 1833 1834 1835 1836
#ifdef CONFIG_TMPFS_XATTR
	.setxattr       = generic_setxattr,
	.getxattr       = generic_getxattr,
	.listxattr      = generic_listxattr,
	.removexattr    = generic_removexattr,
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1837 1838 1839
};

static struct inode_operations shmem_symlink_inode_operations = {
1840
	.truncate	= shmem_truncate,
Alexander Viro's avatar
Alexander Viro committed
1841
	.readlink	= generic_readlink,
1842
	.follow_link	= shmem_follow_link,
Alexander Viro's avatar
Alexander Viro committed
1843
	.put_link	= shmem_put_link,
1844 1845 1846 1847 1848 1849
#ifdef CONFIG_TMPFS_XATTR
	.setxattr       = generic_setxattr,
	.getxattr       = generic_getxattr,
	.listxattr      = generic_listxattr,
	.removexattr    = generic_removexattr,
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1850 1851
};

1852
static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
Linus Torvalds's avatar
Linus Torvalds committed
1853
{
Linus Torvalds's avatar
Linus Torvalds committed
1854
	char *this_char, *value, *rest;
Linus Torvalds's avatar
Linus Torvalds committed
1855

Dave Jones's avatar
Dave Jones committed
1856 1857 1858
	while ((this_char = strsep(&options, ",")) != NULL) {
		if (!*this_char)
			continue;
Linus Torvalds's avatar
Linus Torvalds committed
1859
		if ((value = strchr(this_char,'=')) != NULL) {
Linus Torvalds's avatar
Linus Torvalds committed
1860
			*value++ = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1861
		} else {
1862 1863
			printk(KERN_ERR
			    "tmpfs: No value for mount option '%s'\n",
Linus Torvalds's avatar
Linus Torvalds committed
1864 1865 1866 1867
			    this_char);
			return 1;
		}

Linus Torvalds's avatar
Linus Torvalds committed
1868 1869
		if (!strcmp(this_char,"size")) {
			unsigned long long size;
Linus Torvalds's avatar
Linus Torvalds committed
1870
			size = memparse(value,&rest);
1871 1872 1873 1874 1875 1876
			if (*rest == '%') {
				size <<= PAGE_SHIFT;
				size *= totalram_pages;
				do_div(size, 100);
				rest++;
			}
Linus Torvalds's avatar
Linus Torvalds committed
1877 1878
			if (*rest)
				goto bad_val;
Linus Torvalds's avatar
Linus Torvalds committed
1879 1880
			*blocks = size >> PAGE_CACHE_SHIFT;
		} else if (!strcmp(this_char,"nr_blocks")) {
Linus Torvalds's avatar
Linus Torvalds committed
1881 1882 1883
			*blocks = memparse(value,&rest);
			if (*rest)
				goto bad_val;
Linus Torvalds's avatar
Linus Torvalds committed
1884
		} else if (!strcmp(this_char,"nr_inodes")) {
Linus Torvalds's avatar
Linus Torvalds committed
1885 1886 1887
			*inodes = memparse(value,&rest);
			if (*rest)
				goto bad_val;
Linus Torvalds's avatar
Linus Torvalds committed
1888
		} else if (!strcmp(this_char,"mode")) {
Linus Torvalds's avatar
Linus Torvalds committed
1889 1890 1891 1892 1893
			if (!mode)
				continue;
			*mode = simple_strtoul(value,&rest,8);
			if (*rest)
				goto bad_val;
1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905
		} else if (!strcmp(this_char,"uid")) {
			if (!uid)
				continue;
			*uid = simple_strtoul(value,&rest,0);
			if (*rest)
				goto bad_val;
		} else if (!strcmp(this_char,"gid")) {
			if (!gid)
				continue;
			*gid = simple_strtoul(value,&rest,0);
			if (*rest)
				goto bad_val;
Linus Torvalds's avatar
Linus Torvalds committed
1906
		} else {
1907
			printk(KERN_ERR "tmpfs: Bad mount option %s\n",
Linus Torvalds's avatar
Linus Torvalds committed
1908
			       this_char);
Linus Torvalds's avatar
Linus Torvalds committed
1909
			return 1;
Linus Torvalds's avatar
Linus Torvalds committed
1910
		}
Linus Torvalds's avatar
Linus Torvalds committed
1911
	}
Linus Torvalds's avatar
Linus Torvalds committed
1912
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1913 1914

bad_val:
1915
	printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
Linus Torvalds's avatar
Linus Torvalds committed
1916 1917 1918
	       value, this_char);
	return 1;

Linus Torvalds's avatar
Linus Torvalds committed
1919 1920
}

1921
static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
Linus Torvalds's avatar
Linus Torvalds committed
1922
{
1923
	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1924 1925
	unsigned long max_blocks = 0;
	unsigned long max_inodes = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1926

1927 1928 1929 1930
	if (sbinfo) {
		max_blocks = sbinfo->max_blocks;
		max_inodes = sbinfo->max_inodes;
	}
1931
	if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
Linus Torvalds's avatar
Linus Torvalds committed
1932
		return -EINVAL;
1933 1934 1935 1936 1937 1938
	/* Keep it simple: disallow limited <-> unlimited remount */
	if ((max_blocks || max_inodes) == !sbinfo)
		return -EINVAL;
	/* But allow the pointless unlimited -> unlimited remount */
	if (!sbinfo)
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1939
	return shmem_set_size(sbinfo, max_blocks, max_inodes);
Linus Torvalds's avatar
Linus Torvalds committed
1940
}
Linus Torvalds's avatar
Linus Torvalds committed
1941
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1942

1943 1944 1945 1946 1947 1948
static void shmem_put_super(struct super_block *sb)
{
	kfree(sb->s_fs_info);
	sb->s_fs_info = NULL;
}

1949 1950 1951 1952 1953 1954
#ifdef CONFIG_TMPFS_XATTR
static struct xattr_handler *shmem_xattr_handlers[];
#else
#define shmem_xattr_handlers NULL
#endif

1955 1956
static int shmem_fill_super(struct super_block *sb,
			    void *data, int silent)
Linus Torvalds's avatar
Linus Torvalds committed
1957
{
1958 1959
	struct inode *inode;
	struct dentry *root;
Linus Torvalds's avatar
Linus Torvalds committed
1960
	int mode   = S_IRWXUGO | S_ISVTX;
1961 1962
	uid_t uid = current->fsuid;
	gid_t gid = current->fsgid;
1963
	int err = -ENOMEM;
1964

1965
#ifdef CONFIG_TMPFS
1966 1967 1968
	unsigned long blocks = 0;
	unsigned long inodes = 0;

Linus Torvalds's avatar
Linus Torvalds committed
1969 1970
	/*
	 * Per default we only allow half of the physical ram per
1971 1972
	 * tmpfs instance, limiting inodes to one per page of lowmem;
	 * but the internal instance is left unlimited.
Linus Torvalds's avatar
Linus Torvalds committed
1973
	 */
1974
	if (!(sb->s_flags & MS_NOUSER)) {
1975 1976
		blocks = totalram_pages / 2;
		inodes = totalram_pages - totalhigh_pages;
1977 1978 1979 1980 1981 1982
		if (inodes > blocks)
			inodes = blocks;

		if (shmem_parse_options(data, &mode,
					&uid, &gid, &blocks, &inodes))
			return -EINVAL;
1983
	}
1984

1985 1986
	if (blocks || inodes) {
		struct shmem_sb_info *sbinfo;
1987 1988 1989 1990 1991 1992 1993 1994 1995
		sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
		if (!sbinfo)
			return -ENOMEM;
		sb->s_fs_info = sbinfo;
		spin_lock_init(&sbinfo->stat_lock);
		sbinfo->max_blocks = blocks;
		sbinfo->free_blocks = blocks;
		sbinfo->max_inodes = inodes;
		sbinfo->free_inodes = inodes;
1996
	}
1997
	sb->s_xattr = shmem_xattr_handlers;
1998 1999
#else
	sb->s_flags |= MS_NOUSER;
Linus Torvalds's avatar
Linus Torvalds committed
2000
#endif
Linus Torvalds's avatar
Linus Torvalds committed
2001

2002
	sb->s_maxbytes = SHMEM_MAX_BYTES;
Alexander Viro's avatar
Alexander Viro committed
2003 2004
	sb->s_blocksize = PAGE_CACHE_SIZE;
	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
2005
	sb->s_magic = TMPFS_MAGIC;
Linus Torvalds's avatar
Linus Torvalds committed
2006 2007
	sb->s_op = &shmem_ops;
	inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
2008
	if (!inode)
Alexander Viro's avatar
Alexander Viro committed
2009
		goto failed;
2010 2011
	inode->i_uid = uid;
	inode->i_gid = gid;
Linus Torvalds's avatar
Linus Torvalds committed
2012
	root = d_alloc_root(inode);
2013
	if (!root)
2014
		goto failed_iput;
Linus Torvalds's avatar
Linus Torvalds committed
2015
	sb->s_root = root;
2016
	return 0;
2017 2018 2019 2020

failed_iput:
	iput(inode);
failed:
2021
	shmem_put_super(sb);
2022 2023 2024
	return err;
}

2025
static kmem_cache_t *shmem_inode_cachep;
Linus Torvalds's avatar
Linus Torvalds committed
2026

Linus Torvalds's avatar
Linus Torvalds committed
2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037
static struct inode *shmem_alloc_inode(struct super_block *sb)
{
	struct shmem_inode_info *p;
	p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL);
	if (!p)
		return NULL;
	return &p->vfs_inode;
}

static void shmem_destroy_inode(struct inode *inode)
{
2038 2039 2040 2041
	if ((inode->i_mode & S_IFMT) == S_IFREG) {
		/* only struct inode is valid if it's an inline symlink */
		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
	}
Linus Torvalds's avatar
Linus Torvalds committed
2042 2043 2044
	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}

2045
static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
Linus Torvalds's avatar
Linus Torvalds committed
2046 2047 2048 2049 2050 2051 2052 2053
{
	struct shmem_inode_info *p = (struct shmem_inode_info *) foo;

	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
	    SLAB_CTOR_CONSTRUCTOR) {
		inode_init_once(&p->vfs_inode);
	}
}
2054

Linus Torvalds's avatar
Linus Torvalds committed
2055 2056 2057
static int init_inodecache(void)
{
	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
2058
				sizeof(struct shmem_inode_info),
2059
				0, 0, init_once, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
2060 2061 2062 2063 2064 2065 2066 2067 2068 2069
	if (shmem_inode_cachep == NULL)
		return -ENOMEM;
	return 0;
}

static void destroy_inodecache(void)
{
	if (kmem_cache_destroy(shmem_inode_cachep))
		printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
}
Linus Torvalds's avatar
Linus Torvalds committed
2070 2071

static struct address_space_operations shmem_aops = {
2072 2073
	.writepage	= shmem_writepage,
	.set_page_dirty	= __set_page_dirty_nobuffers,
2074 2075 2076 2077
#ifdef CONFIG_TMPFS
	.prepare_write	= shmem_prepare_write,
	.commit_write	= simple_commit_write,
#endif
Linus Torvalds's avatar
Linus Torvalds committed
2078 2079 2080
};

static struct file_operations shmem_file_operations = {
2081
	.mmap		= shmem_mmap,
Linus Torvalds's avatar
Linus Torvalds committed
2082
#ifdef CONFIG_TMPFS
2083
	.llseek		= generic_file_llseek,
2084 2085
	.read		= shmem_file_read,
	.write		= shmem_file_write,
2086
	.fsync		= simple_sync_file,
2087
	.sendfile	= shmem_file_sendfile,
Linus Torvalds's avatar
Linus Torvalds committed
2088
#endif
Linus Torvalds's avatar
Linus Torvalds committed
2089 2090 2091
};

static struct inode_operations shmem_inode_operations = {
2092 2093
	.truncate	= shmem_truncate,
	.setattr	= shmem_notify_change,
2094 2095 2096 2097 2098 2099
#ifdef CONFIG_TMPFS_XATTR
	.setxattr       = generic_setxattr,
	.getxattr       = generic_getxattr,
	.listxattr      = generic_listxattr,
	.removexattr    = generic_removexattr,
#endif
Linus Torvalds's avatar
Linus Torvalds committed
2100 2101 2102
};

static struct inode_operations shmem_dir_inode_operations = {
Linus Torvalds's avatar
Linus Torvalds committed
2103
#ifdef CONFIG_TMPFS
2104 2105 2106 2107 2108 2109 2110 2111 2112
	.create		= shmem_create,
	.lookup		= simple_lookup,
	.link		= shmem_link,
	.unlink		= shmem_unlink,
	.symlink	= shmem_symlink,
	.mkdir		= shmem_mkdir,
	.rmdir		= shmem_rmdir,
	.mknod		= shmem_mknod,
	.rename		= shmem_rename,
2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127
#ifdef CONFIG_TMPFS_XATTR
	.setxattr       = generic_setxattr,
	.getxattr       = generic_getxattr,
	.listxattr      = generic_listxattr,
	.removexattr    = generic_removexattr,
#endif
#endif
};

static struct inode_operations shmem_special_inode_operations = {
#ifdef CONFIG_TMPFS_XATTR
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= generic_listxattr,
	.removexattr	= generic_removexattr,
Linus Torvalds's avatar
Linus Torvalds committed
2128
#endif
Linus Torvalds's avatar
Linus Torvalds committed
2129 2130 2131
};

static struct super_operations shmem_ops = {
2132 2133
	.alloc_inode	= shmem_alloc_inode,
	.destroy_inode	= shmem_destroy_inode,
Linus Torvalds's avatar
Linus Torvalds committed
2134
#ifdef CONFIG_TMPFS
2135 2136
	.statfs		= shmem_statfs,
	.remount_fs	= shmem_remount_fs,
Linus Torvalds's avatar
Linus Torvalds committed
2137
#endif
2138 2139 2140
	.delete_inode	= shmem_delete_inode,
	.drop_inode	= generic_delete_inode,
	.put_super	= shmem_put_super,
Linus Torvalds's avatar
Linus Torvalds committed
2141 2142
};

Linus Torvalds's avatar
Linus Torvalds committed
2143
static struct vm_operations_struct shmem_vm_ops = {
2144
	.nopage		= shmem_nopage,
2145
	.populate	= shmem_populate,
2146 2147 2148 2149
#ifdef CONFIG_NUMA
	.set_policy     = shmem_set_policy,
	.get_policy     = shmem_get_policy,
#endif
Linus Torvalds's avatar
Linus Torvalds committed
2150 2151
};

2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194

#ifdef CONFIG_TMPFS_SECURITY

static size_t shmem_xattr_security_list(struct inode *inode, char *list, size_t list_len,
					const char *name, size_t name_len)
{
	return security_inode_listsecurity(inode, list, list_len);
}

static int shmem_xattr_security_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
	if (strcmp(name, "") == 0)
		return -EINVAL;
	return security_inode_getsecurity(inode, name, buffer, size);
}

static int shmem_xattr_security_set(struct inode *inode, const char *name, const void *value, size_t size, int flags)
{
	if (strcmp(name, "") == 0)
		return -EINVAL;
	return security_inode_setsecurity(inode, name, value, size, flags);
}

struct xattr_handler shmem_xattr_security_handler = {
	.prefix	= XATTR_SECURITY_PREFIX,
	.list	= shmem_xattr_security_list,
	.get	= shmem_xattr_security_get,
	.set	= shmem_xattr_security_set,
};

#endif	/* CONFIG_TMPFS_SECURITY */

#ifdef CONFIG_TMPFS_XATTR

static struct xattr_handler *shmem_xattr_handlers[] = {
#ifdef CONFIG_TMPFS_SECURITY
	&shmem_xattr_security_handler,
#endif
	NULL
};

#endif	/* CONFIG_TMPFS_XATTR */

2195
static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
2196
	int flags, const char *dev_name, void *data)
2197 2198 2199 2200 2201
{
	return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
}

static struct file_system_type tmpfs_fs_type = {
2202 2203 2204 2205
	.owner		= THIS_MODULE,
	.name		= "tmpfs",
	.get_sb		= shmem_get_sb,
	.kill_sb	= kill_litter_super,
2206
};
Linus Torvalds's avatar
Linus Torvalds committed
2207
static struct vfsmount *shm_mnt;
Linus Torvalds's avatar
Linus Torvalds committed
2208

2209
static int __init init_tmpfs(void)
Linus Torvalds's avatar
Linus Torvalds committed
2210 2211 2212
{
	int error;

Linus Torvalds's avatar
Linus Torvalds committed
2213 2214 2215 2216 2217 2218
	error = init_inodecache();
	if (error)
		goto out3;

	error = register_filesystem(&tmpfs_fs_type);
	if (error) {
2219
		printk(KERN_ERR "Could not register tmpfs\n");
Linus Torvalds's avatar
Linus Torvalds committed
2220
		goto out2;
Linus Torvalds's avatar
Linus Torvalds committed
2221 2222
	}
#ifdef CONFIG_TMPFS
2223
	devfs_mk_dir("shm");
Linus Torvalds's avatar
Linus Torvalds committed
2224
#endif
2225 2226
	shm_mnt = do_kern_mount(tmpfs_fs_type.name, MS_NOUSER,
				tmpfs_fs_type.name, NULL);
2227 2228 2229 2230
	if (IS_ERR(shm_mnt)) {
		error = PTR_ERR(shm_mnt);
		printk(KERN_ERR "Could not kern_mount tmpfs\n");
		goto out1;
Linus Torvalds's avatar
Linus Torvalds committed
2231 2232
	}
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
2233 2234 2235 2236 2237 2238

out1:
	unregister_filesystem(&tmpfs_fs_type);
out2:
	destroy_inodecache();
out3:
2239
	shm_mnt = ERR_PTR(error);
Linus Torvalds's avatar
Linus Torvalds committed
2240
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
2241
}
2242
module_init(init_tmpfs)
Linus Torvalds's avatar
Linus Torvalds committed
2243 2244

/*
2245
 * shmem_file_setup - get an unlinked file living in tmpfs
Linus Torvalds's avatar
Linus Torvalds committed
2246 2247 2248 2249 2250
 *
 * @name: name for dentry (to be seen in /proc/<pid>/maps
 * @size: size to be set for the file
 *
 */
2251
struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
Linus Torvalds's avatar
Linus Torvalds committed
2252
{
2253
	int error;
Linus Torvalds's avatar
Linus Torvalds committed
2254
	struct file *file;
2255
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
2256 2257 2258
	struct dentry *dentry, *root;
	struct qstr this;

2259 2260 2261
	if (IS_ERR(shm_mnt))
		return (void *)shm_mnt;

2262
	if (size < 0 || size > SHMEM_MAX_BYTES)
Linus Torvalds's avatar
Linus Torvalds committed
2263 2264
		return ERR_PTR(-EINVAL);

2265
	if (shmem_acct_size(flags, size))
Linus Torvalds's avatar
Linus Torvalds committed
2266
		return ERR_PTR(-ENOMEM);
Linus Torvalds's avatar
Linus Torvalds committed
2267

2268
	error = -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
2269 2270 2271
	this.name = name;
	this.len = strlen(name);
	this.hash = 0; /* will go */
Linus Torvalds's avatar
Linus Torvalds committed
2272
	root = shm_mnt->mnt_root;
Linus Torvalds's avatar
Linus Torvalds committed
2273 2274
	dentry = d_alloc(root, &this);
	if (!dentry)
Andrew Morton's avatar
Andrew Morton committed
2275
		goto put_memory;
Linus Torvalds's avatar
Linus Torvalds committed
2276 2277 2278 2279 2280 2281 2282 2283

	error = -ENFILE;
	file = get_empty_filp();
	if (!file)
		goto put_dentry;

	error = -ENOSPC;
	inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
2284
	if (!inode)
Linus Torvalds's avatar
Linus Torvalds committed
2285 2286
		goto close_file;

2287
	SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
Linus Torvalds's avatar
Linus Torvalds committed
2288
	d_instantiate(dentry, inode);
2289 2290
	inode->i_size = size;
	inode->i_nlink = 0;	/* It is unlinked */
Linus Torvalds's avatar
Linus Torvalds committed
2291
	file->f_vfsmnt = mntget(shm_mnt);
Linus Torvalds's avatar
Linus Torvalds committed
2292
	file->f_dentry = dentry;
2293
	file->f_mapping = inode->i_mapping;
Linus Torvalds's avatar
Linus Torvalds committed
2294 2295
	file->f_op = &shmem_file_operations;
	file->f_mode = FMODE_WRITE | FMODE_READ;
2296
	return file;
Linus Torvalds's avatar
Linus Torvalds committed
2297 2298 2299 2300

close_file:
	put_filp(file);
put_dentry:
2301
	dput(dentry);
Andrew Morton's avatar
Andrew Morton committed
2302
put_memory:
2303
	shmem_unacct_size(flags, size);
2304
	return ERR_PTR(error);
Linus Torvalds's avatar
Linus Torvalds committed
2305
}
2306

Linus Torvalds's avatar
Linus Torvalds committed
2307 2308 2309 2310 2311 2312 2313 2314 2315
/*
 * shmem_zero_setup - setup a shared anonymous mapping
 *
 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
 */
int shmem_zero_setup(struct vm_area_struct *vma)
{
	struct file *file;
	loff_t size = vma->vm_end - vma->vm_start;
2316

2317
	file = shmem_file_setup("dev/zero", size, vma->vm_flags);
Linus Torvalds's avatar
Linus Torvalds committed
2318 2319 2320 2321
	if (IS_ERR(file))
		return PTR_ERR(file);

	if (vma->vm_file)
2322
		fput(vma->vm_file);
Linus Torvalds's avatar
Linus Torvalds committed
2323
	vma->vm_file = file;
Linus Torvalds's avatar
Linus Torvalds committed
2324
	vma->vm_ops = &shmem_vm_ops;
Linus Torvalds's avatar
Linus Torvalds committed
2325 2326
	return 0;
}