Commit 972b8494 authored by Linus Torvalds's avatar Linus Torvalds

Merge http://gkernel.bkbits.net/misc-2.5

into penguin.transmeta.com:/home/penguin/torvalds/repositories/kernel/linux
parents 0b818f9d 7fc97e8d
...@@ -2667,7 +2667,7 @@ under /dev. These special filesystems provide kernel interfaces that ...@@ -2667,7 +2667,7 @@ under /dev. These special filesystems provide kernel interfaces that
cannot be provided with standard device nodes. cannot be provided with standard device nodes.
/dev/pts devpts PTY slave filesystem /dev/pts devpts PTY slave filesystem
/dev/shm shmfs POSIX shared memory maintenance access /dev/shm tmpfs POSIX shared memory maintenance access
**** TERMINAL DEVICES **** TERMINAL DEVICES
......
...@@ -47,10 +47,9 @@ tmpfs has the following uses: ...@@ -47,10 +47,9 @@ tmpfs has the following uses:
shared memory) shared memory)
3) Some people (including me) find it very convenient to mount it 3) Some people (including me) find it very convenient to mount it
e.g. on /tmp and /var/tmp and have a big swap partition. But be e.g. on /tmp and /var/tmp and have a big swap partition. And now
aware: loop mounts of tmpfs files do not work due to the internal loop mounts of tmpfs files do work, so mkinitrd shipped by most
design. So mkinitrd shipped by most distributions will fail with a distributions should succeed with a tmpfs /tmp.
tmpfs /tmp.
4) And probably a lot more I do not know about :-) 4) And probably a lot more I do not know about :-)
...@@ -90,13 +89,9 @@ TODOs: ...@@ -90,13 +89,9 @@ TODOs:
size=50% the tmpfs instance should be able to grow to 50 percent of size=50% the tmpfs instance should be able to grow to 50 percent of
RAM + swap. So the instance should adapt automatically if you add RAM + swap. So the instance should adapt automatically if you add
or remove swap space. or remove swap space.
2) loop mounts: This is difficult since loop.c relies on the readpage 2) Show the number of tmpfs RAM pages. (As shared?)
operation. This operation gets a page from the caller to be filled
with the content of the file at that position. But tmpfs always has
the page and thus cannot copy the content to the given page. So it
cannot provide this operation. The VM had to be changed seriously
to achieve this.
3) Show the number of tmpfs RAM pages. (As shared?)
Author: Author:
Christoph Rohland <cr@sap.com>, 1.12.01 Christoph Rohland <cr@sap.com>, 1.12.01
Updated:
Hugh Dickins <hugh@veritas.com>, 17 Oct 2002
...@@ -72,7 +72,9 @@ EXPORT_SYMBOL(pfn_to_nid); ...@@ -72,7 +72,9 @@ EXPORT_SYMBOL(pfn_to_nid);
#ifdef CONFIG_X86_NUMAQ #ifdef CONFIG_X86_NUMAQ
EXPORT_SYMBOL(xquad_portio); EXPORT_SYMBOL(xquad_portio);
#endif #endif
#ifndef CONFIG_X86_WP_WORKS_OK
EXPORT_SYMBOL(__verify_write); EXPORT_SYMBOL(__verify_write);
#endif
EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_thread);
EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(dump_fpu);
EXPORT_SYMBOL(dump_extended_fpu); EXPORT_SYMBOL(dump_extended_fpu);
......
...@@ -375,12 +375,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) ...@@ -375,12 +375,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
break; break;
} }
ret = 0; ret = 0;
if ( !child->used_math ) { if (!child->used_math)
/* Simulate an empty FPU. */ init_fpu(child);
set_fpu_cwd(child, 0x037f);
set_fpu_swd(child, 0x0000);
set_fpu_twd(child, 0xffff);
}
get_fpregs((struct user_i387_struct *)data, child); get_fpregs((struct user_i387_struct *)data, child);
break; break;
} }
...@@ -403,13 +399,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) ...@@ -403,13 +399,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
ret = -EIO; ret = -EIO;
break; break;
} }
if ( !child->used_math ) { if (!child->used_math)
/* Simulate an empty FPU. */ init_fpu(child);
set_fpu_cwd(child, 0x037f);
set_fpu_swd(child, 0x0000);
set_fpu_twd(child, 0xffff);
set_fpu_mxcsr(child, 0x1f80);
}
ret = get_fpxregs((struct user_fxsr_struct *)data, child); ret = get_fpxregs((struct user_fxsr_struct *)data, child);
break; break;
} }
......
...@@ -54,10 +54,10 @@ static inline void save_processor_context (void) ...@@ -54,10 +54,10 @@ static inline void save_processor_context (void)
/* /*
* descriptor tables * descriptor tables
*/ */
asm volatile ("sgdt (%0)" : "=m" (saved_context.gdt_limit)); asm volatile ("sgdt %0" : "=m" (saved_context.gdt_limit));
asm volatile ("sidt (%0)" : "=m" (saved_context.idt_limit)); asm volatile ("sidt %0" : "=m" (saved_context.idt_limit));
asm volatile ("sldt (%0)" : "=m" (saved_context.ldt)); asm volatile ("sldt %0" : "=m" (saved_context.ldt));
asm volatile ("str (%0)" : "=m" (saved_context.tr)); asm volatile ("str %0" : "=m" (saved_context.tr));
/* /*
* save the general registers. * save the general registers.
...@@ -67,22 +67,22 @@ static inline void save_processor_context (void) ...@@ -67,22 +67,22 @@ static inline void save_processor_context (void)
* It's really not necessary, and kinda fishy (check the assembly output), * It's really not necessary, and kinda fishy (check the assembly output),
* so it's avoided. * so it's avoided.
*/ */
asm volatile ("movl %%esp, (%0)" : "=m" (saved_context.esp)); asm volatile ("movl %%esp, %0" : "=m" (saved_context.esp));
asm volatile ("movl %%eax, (%0)" : "=m" (saved_context.eax)); asm volatile ("movl %%eax, %0" : "=m" (saved_context.eax));
asm volatile ("movl %%ebx, (%0)" : "=m" (saved_context.ebx)); asm volatile ("movl %%ebx, %0" : "=m" (saved_context.ebx));
asm volatile ("movl %%ecx, (%0)" : "=m" (saved_context.ecx)); asm volatile ("movl %%ecx, %0" : "=m" (saved_context.ecx));
asm volatile ("movl %%edx, (%0)" : "=m" (saved_context.edx)); asm volatile ("movl %%edx, %0" : "=m" (saved_context.edx));
asm volatile ("movl %%ebp, (%0)" : "=m" (saved_context.ebp)); asm volatile ("movl %%ebp, %0" : "=m" (saved_context.ebp));
asm volatile ("movl %%esi, (%0)" : "=m" (saved_context.esi)); asm volatile ("movl %%esi, %0" : "=m" (saved_context.esi));
asm volatile ("movl %%edi, (%0)" : "=m" (saved_context.edi)); asm volatile ("movl %%edi, %0" : "=m" (saved_context.edi));
/* FIXME: Need to save XMM0..XMM15? */
/* /*
* segment registers * segment registers
*/ */
asm volatile ("movw %%es, %0" : "=r" (saved_context.es)); asm volatile ("movw %%es, %0" : "=m" (saved_context.es));
asm volatile ("movw %%fs, %0" : "=r" (saved_context.fs)); asm volatile ("movw %%fs, %0" : "=m" (saved_context.fs));
asm volatile ("movw %%gs, %0" : "=r" (saved_context.gs)); asm volatile ("movw %%gs, %0" : "=m" (saved_context.gs));
asm volatile ("movw %%ss, %0" : "=r" (saved_context.ss)); asm volatile ("movw %%ss, %0" : "=m" (saved_context.ss));
/* /*
* control registers * control registers
...@@ -95,7 +95,7 @@ static inline void save_processor_context (void) ...@@ -95,7 +95,7 @@ static inline void save_processor_context (void)
/* /*
* eflags * eflags
*/ */
asm volatile ("pushfl ; popl (%0)" : "=m" (saved_context.eflags)); asm volatile ("pushfl ; popl %0" : "=m" (saved_context.eflags));
} }
static void static void
...@@ -125,9 +125,7 @@ static inline void restore_processor_context (void) ...@@ -125,9 +125,7 @@ static inline void restore_processor_context (void)
/* /*
* first restore %ds, so we can access our data properly * first restore %ds, so we can access our data properly
*/ */
asm volatile (".align 4"); asm volatile ("movw %0, %%ds" :: "r" (__KERNEL_DS));
asm volatile ("movw %0, %%ds" :: "r" ((u16)__KERNEL_DS));
/* /*
* control registers * control registers
...@@ -167,9 +165,9 @@ static inline void restore_processor_context (void) ...@@ -167,9 +165,9 @@ static inline void restore_processor_context (void)
* now restore the descriptor tables to their proper values * now restore the descriptor tables to their proper values
* ltr is done i fix_processor_context(). * ltr is done i fix_processor_context().
*/ */
asm volatile ("lgdt (%0)" :: "m" (saved_context.gdt_limit)); asm volatile ("lgdt %0" :: "m" (saved_context.gdt_limit));
asm volatile ("lidt (%0)" :: "m" (saved_context.idt_limit)); asm volatile ("lidt %0" :: "m" (saved_context.idt_limit));
asm volatile ("lldt (%0)" :: "m" (saved_context.ldt)); asm volatile ("lldt %0" :: "m" (saved_context.ldt));
fix_processor_context(); fix_processor_context();
......
...@@ -30,17 +30,20 @@ extern void die(const char *,struct pt_regs *,long); ...@@ -30,17 +30,20 @@ extern void die(const char *,struct pt_regs *,long);
extern int console_loglevel; extern int console_loglevel;
#ifndef CONFIG_X86_WP_WORKS_OK
/* /*
* Ugly, ugly, but the goto's result in better assembly.. * Ugly, ugly, but the goto's result in better assembly..
*/ */
int __verify_write(const void * addr, unsigned long size) int __verify_write(const void * addr, unsigned long size)
{ {
struct mm_struct *mm = current->mm;
struct vm_area_struct * vma; struct vm_area_struct * vma;
unsigned long start = (unsigned long) addr; unsigned long start = (unsigned long) addr;
if (!size) if (!size || segment_eq(get_fs(),KERNEL_DS))
return 1; return 1;
down_read(&mm->mmap_sem);
vma = find_vma(current->mm, start); vma = find_vma(current->mm, start);
if (!vma) if (!vma)
goto bad_area; goto bad_area;
...@@ -80,6 +83,13 @@ int __verify_write(const void * addr, unsigned long size) ...@@ -80,6 +83,13 @@ int __verify_write(const void * addr, unsigned long size)
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area;; goto bad_area;;
} }
/*
* We really need to hold mmap_sem over the whole access to
* userspace, else another thread could change permissions.
* This is unfixable, so don't use i386-class machines for
* critical servers.
*/
up_read(&mm->mmap_sem);
return 1; return 1;
check_stack: check_stack:
...@@ -89,6 +99,7 @@ int __verify_write(const void * addr, unsigned long size) ...@@ -89,6 +99,7 @@ int __verify_write(const void * addr, unsigned long size)
goto good_area; goto good_area;
bad_area: bad_area:
up_read(&mm->mmap_sem);
return 0; return 0;
out_of_memory: out_of_memory:
...@@ -98,6 +109,7 @@ int __verify_write(const void * addr, unsigned long size) ...@@ -98,6 +109,7 @@ int __verify_write(const void * addr, unsigned long size)
} }
goto bad_area; goto bad_area;
} }
#endif
/* /*
* Unlock any spinlocks which will prevent us from getting the * Unlock any spinlocks which will prevent us from getting the
......
...@@ -378,15 +378,10 @@ void __init paging_init(void) ...@@ -378,15 +378,10 @@ void __init paging_init(void)
* This function cannot be __init, since exceptions don't work in that * This function cannot be __init, since exceptions don't work in that
* section. * section.
*/ */
static int do_test_wp_bit(unsigned long vaddr); static int do_test_wp_bit(void);
void __init test_wp_bit(void) void __init test_wp_bit(void)
{ {
const unsigned long vaddr = PAGE_OFFSET;
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte, old_pte;
if (cpu_has_pse) { if (cpu_has_pse) {
/* Ok, all PSE-capable CPUs are definitely handling the WP bit right. */ /* Ok, all PSE-capable CPUs are definitely handling the WP bit right. */
boot_cpu_data.wp_works_ok = 1; boot_cpu_data.wp_works_ok = 1;
...@@ -395,17 +390,10 @@ void __init test_wp_bit(void) ...@@ -395,17 +390,10 @@ void __init test_wp_bit(void)
printk("Checking if this processor honours the WP bit even in supervisor mode... "); printk("Checking if this processor honours the WP bit even in supervisor mode... ");
pgd = swapper_pg_dir + __pgd_offset(vaddr); /* Any page-aligned address will do, the test is non-destructive */
pmd = pmd_offset(pgd, vaddr); __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
pte = pte_offset_kernel(pmd, vaddr); boot_cpu_data.wp_works_ok = do_test_wp_bit();
old_pte = *pte; clear_fixmap(FIX_WP_TEST);
*pte = pfn_pte(0, PAGE_READONLY);
local_flush_tlb();
boot_cpu_data.wp_works_ok = do_test_wp_bit(vaddr);
*pte = old_pte;
local_flush_tlb();
if (!boot_cpu_data.wp_works_ok) { if (!boot_cpu_data.wp_works_ok) {
printk("No.\n"); printk("No.\n");
...@@ -550,7 +538,7 @@ void __init pgtable_cache_init(void) ...@@ -550,7 +538,7 @@ void __init pgtable_cache_init(void)
#endif #endif
/* Put this after the callers, so that it cannot be inlined */ /* Put this after the callers, so that it cannot be inlined */
static int do_test_wp_bit(unsigned long vaddr) static int do_test_wp_bit(void)
{ {
char tmp_reg; char tmp_reg;
int flag; int flag;
...@@ -564,7 +552,7 @@ static int do_test_wp_bit(unsigned long vaddr) ...@@ -564,7 +552,7 @@ static int do_test_wp_bit(unsigned long vaddr)
" .align 4 \n" " .align 4 \n"
" .long 1b,2b \n" " .long 1b,2b \n"
".previous \n" ".previous \n"
:"=m" (*(char *) vaddr), :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
"=q" (tmp_reg), "=q" (tmp_reg),
"=r" (flag) "=r" (flag)
:"2" (1) :"2" (1)
......
...@@ -289,7 +289,7 @@ void __init bt_iounmap(void *addr, unsigned long size) ...@@ -289,7 +289,7 @@ void __init bt_iounmap(void *addr, unsigned long size)
idx = FIX_BTMAP_BEGIN; idx = FIX_BTMAP_BEGIN;
while (nrpages > 0) { while (nrpages > 0) {
__set_fixmap(idx, 0, __pgprot(0)); clear_fixmap(idx);
--idx; --idx;
--nrpages; --nrpages;
} }
......
...@@ -29,58 +29,6 @@ ...@@ -29,58 +29,6 @@
extern void die(const char *,struct pt_regs *,long); extern void die(const char *,struct pt_regs *,long);
/*
* Ugly, ugly, but the goto's result in better assembly..
*/
int __verify_write(const void * addr, unsigned long size)
{
struct vm_area_struct * vma;
unsigned long start = (unsigned long) addr;
if (!size)
return 1;
vma = find_vma(current->mm, start);
if (!vma)
goto bad_area;
if (vma->vm_start > start)
goto check_stack;
good_area:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
size--;
size += start & ~PAGE_MASK;
size >>= PAGE_SHIFT;
start &= PAGE_MASK;
for (;;) {
if (handle_mm_fault(current->mm, vma, start, 1) <= 0)
goto bad_area;
if (!size)
break;
size--;
start += PAGE_SIZE;
if (start < vma->vm_end)
continue;
vma = vma->vm_next;
if (!vma || vma->vm_start != start)
goto bad_area;
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;;
}
return 1;
check_stack:
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
if (expand_stack(vma, start) == 0)
goto good_area;
bad_area:
return 0;
}
/* /*
* This routine handles page faults. It determines the address, * This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate * and the problem, and then passes it off to one of the appropriate
......
...@@ -432,7 +432,7 @@ static int __init rd_init (void) ...@@ -432,7 +432,7 @@ static int __init rd_init (void)
disk->first_minor = i; disk->first_minor = i;
disk->fops = &rd_bd_op; disk->fops = &rd_bd_op;
disk->queue = &rd_queue; disk->queue = &rd_queue;
sprintf(disk->disk_name, "rd%d", i); sprintf(disk->disk_name, "ram%d", i);
set_capacity(disk, rd_size * 2); set_capacity(disk, rd_size * 2);
} }
devfs_handle = devfs_mk_dir (NULL, "rd", NULL); devfs_handle = devfs_mk_dir (NULL, "rd", NULL);
......
...@@ -1497,7 +1497,7 @@ int __init cm206_init(void) ...@@ -1497,7 +1497,7 @@ int __init cm206_init(void)
goto out_disk; goto out_disk;
disk->major = MAJOR_NR; disk->major = MAJOR_NR;
disk->first_minor = 0; disk->first_minor = 0;
sprintf(disk->disk_name, "cm206"); sprintf(disk->disk_name, "cm206cd");
disk->fops = &cm206_bdops; disk->fops = &cm206_bdops;
disk->flags = GENHD_FL_CD; disk->flags = GENHD_FL_CD;
cm206_gendisk = disk; cm206_gendisk = disk;
......
...@@ -3287,13 +3287,13 @@ static int __init md_setup(char *str) ...@@ -3287,13 +3287,13 @@ static int __init md_setup(char *str)
return 1; return 1;
} }
extern kdev_t name_to_kdev_t(char *line) __init; extern dev_t name_to_dev_t(char *line) __init;
void __init md_setup_drive(void) void __init md_setup_drive(void)
{ {
int minor, i; int minor, i;
kdev_t dev; dev_t dev;
mddev_t*mddev; mddev_t*mddev;
kdev_t devices[MD_SB_DISKS+1]; dev_t devices[MD_SB_DISKS+1];
for (minor = 0; minor < MAX_MD_DEVS; minor++) { for (minor = 0; minor < MAX_MD_DEVS; minor++) {
int err = 0; int err = 0;
...@@ -3312,16 +3312,17 @@ void __init md_setup_drive(void) ...@@ -3312,16 +3312,17 @@ void __init md_setup_drive(void)
if (p) if (p)
*p++ = 0; *p++ = 0;
dev = name_to_kdev_t(devname); dev = name_to_dev_t(devname);
handle = devfs_get_handle(NULL, devname, major(dev), minor(dev), handle = devfs_get_handle(NULL, devname,
MAJOR(dev), MINOR(dev),
DEVFS_SPECIAL_BLK, 1); DEVFS_SPECIAL_BLK, 1);
if (handle != 0) { if (handle != 0) {
unsigned major, minor; unsigned major, minor;
devfs_get_maj_min(handle, &major, &minor); devfs_get_maj_min(handle, &major, &minor);
dev = mk_kdev(major, minor); dev = MKDEV(major, minor);
devfs_put(handle); devfs_put(handle);
} }
if (kdev_none(dev)) { if (!dev) {
printk(KERN_WARNING "md: Unknown device name: %s\n", devname); printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
break; break;
} }
...@@ -3331,7 +3332,7 @@ void __init md_setup_drive(void) ...@@ -3331,7 +3332,7 @@ void __init md_setup_drive(void)
devname = p; devname = p;
} }
devices[i] = to_kdev_t(0); devices[i] = 0;
if (!md_setup_args.device_set[minor]) if (!md_setup_args.device_set[minor])
continue; continue;
...@@ -3375,13 +3376,13 @@ void __init md_setup_drive(void) ...@@ -3375,13 +3376,13 @@ void __init md_setup_drive(void)
err = set_array_info(mddev, &ainfo); err = set_array_info(mddev, &ainfo);
for (i = 0; !err && i <= MD_SB_DISKS; i++) { for (i = 0; !err && i <= MD_SB_DISKS; i++) {
dev = devices[i]; dev = devices[i];
if (kdev_none(dev)) if (!dev)
break; break;
dinfo.number = i; dinfo.number = i;
dinfo.raid_disk = i; dinfo.raid_disk = i;
dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC); dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
dinfo.major = major(dev); dinfo.major = MAJOR(dev);
dinfo.minor = minor(dev); dinfo.minor = MINOR(dev);
mddev->raid_disks++; mddev->raid_disks++;
err = add_new_disk (mddev, &dinfo); err = add_new_disk (mddev, &dinfo);
} }
...@@ -3389,10 +3390,10 @@ void __init md_setup_drive(void) ...@@ -3389,10 +3390,10 @@ void __init md_setup_drive(void)
/* persistent */ /* persistent */
for (i = 0; i <= MD_SB_DISKS; i++) { for (i = 0; i <= MD_SB_DISKS; i++) {
dev = devices[i]; dev = devices[i];
if (kdev_none(dev)) if (!dev)
break; break;
dinfo.major = major(dev); dinfo.major = MAJOR(dev);
dinfo.minor = minor(dev); dinfo.minor = MINOR(dev);
add_new_disk (mddev, &dinfo); add_new_disk (mddev, &dinfo);
} }
} }
......
...@@ -1044,7 +1044,7 @@ static int __init calc_erase_regions(struct mtd_erase_region_info *info, size_t ...@@ -1044,7 +1044,7 @@ static int __init calc_erase_regions(struct mtd_erase_region_info *info, size_t
} }
extern kdev_t name_to_kdev_t(char *line) __init; extern dev_t name_to_dev_t(char *line) __init;
/* Startup */ /* Startup */
static int __init init_blkmtd(void) static int __init init_blkmtd(void)
...@@ -1059,7 +1059,7 @@ static int __init init_blkmtd(void) ...@@ -1059,7 +1059,7 @@ static int __init init_blkmtd(void)
loff_t size; loff_t size;
int readonly = 0; int readonly = 0;
int erase_size = CONFIG_MTD_BLKDEV_ERASESIZE; int erase_size = CONFIG_MTD_BLKDEV_ERASESIZE;
kdev_t rdev; dev_t rdev;
struct block_device *bdev; struct block_device *bdev;
int err; int err;
int mode; int mode;
...@@ -1107,17 +1107,17 @@ static int __init init_blkmtd(void) ...@@ -1107,17 +1107,17 @@ static int __init init_blkmtd(void)
filp_close(file, NULL); filp_close(file, NULL);
return 1; return 1;
} }
rdev = inode->i_rdev; rdev = inode->i_bdev->bd_dev;
filp_close(file, NULL); filp_close(file, NULL);
#else #else
rdev = name_to_kdev_t(device); rdev = name_to_dev_t(device);
#endif #endif
maj = major(rdev); maj = MAJOR(rdev);
min = minor(rdev); min = MINOR(rdev);
DEBUG(1, "blkmtd: found a block device major = %d, minor = %d\n", maj, min); DEBUG(1, "blkmtd: found a block device major = %d, minor = %d\n", maj, min);
if(kdev_none(rdev)) { if(!rdev) {
printk("blkmtd: bad block device: `%s'\n", device); printk("blkmtd: bad block device: `%s'\n", device);
return 1; return 1;
} }
......
...@@ -1792,9 +1792,6 @@ xfs_alloc_buftarg( ...@@ -1792,9 +1792,6 @@ xfs_alloc_buftarg(
case EVMS_MAJOR: case EVMS_MAJOR:
btp->pbr_flags = PBR_ALIGNED_ONLY; btp->pbr_flags = PBR_ALIGNED_ONLY;
break; break;
case LVM_BLK_MAJOR:
btp->pbr_flags = PBR_SECTOR_ONLY;
break;
} }
return btp; return btp;
......
...@@ -41,13 +41,6 @@ ...@@ -41,13 +41,6 @@
* TLB entries of such buffers will not be flushed across * TLB entries of such buffers will not be flushed across
* task switches. * task switches.
*/ */
/*
* on UP currently we will have no trace of the fixmap mechanizm,
* no page table allocations, etc. This might change in the
* future, say framebuffers for the console driver(s) could be
* fix-mapped?
*/
enum fixed_addresses { enum fixed_addresses {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
...@@ -81,6 +74,7 @@ enum fixed_addresses { ...@@ -81,6 +74,7 @@ enum fixed_addresses {
#define NR_FIX_BTMAPS 16 #define NR_FIX_BTMAPS 16
FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
FIX_WP_TEST,
__end_of_fixed_addresses __end_of_fixed_addresses
}; };
...@@ -94,6 +88,10 @@ extern void __set_fixmap (enum fixed_addresses idx, ...@@ -94,6 +88,10 @@ extern void __set_fixmap (enum fixed_addresses idx,
*/ */
#define set_fixmap_nocache(idx, phys) \ #define set_fixmap_nocache(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
#define clear_fixmap(idx) \
__set_fixmap(idx, 0, __pgprot(0))
/* /*
* used by vmalloc.c. * used by vmalloc.c.
* *
......
...@@ -15,22 +15,22 @@ arch_prepare_suspend(void) ...@@ -15,22 +15,22 @@ arch_prepare_suspend(void)
/* image of the saved processor state */ /* image of the saved processor state */
struct saved_context { struct saved_context {
u32 eax, ebx, ecx, edx; unsigned long eax, ebx, ecx, edx;
u32 esp, ebp, esi, edi; unsigned long esp, ebp, esi, edi;
u16 es, fs, gs, ss; u16 es, fs, gs, ss;
u32 cr0, cr2, cr3, cr4; unsigned long cr0, cr2, cr3, cr4;
u16 gdt_pad; u16 gdt_pad;
u16 gdt_limit; u16 gdt_limit;
u32 gdt_base; unsigned long gdt_base;
u16 idt_pad; u16 idt_pad;
u16 idt_limit; u16 idt_limit;
u32 idt_base; unsigned long idt_base;
u16 ldt; u16 ldt;
u16 tss; u16 tss;
u32 tr; unsigned long tr;
u32 safety; unsigned long safety;
u32 return_address; unsigned long return_address;
u32 eflags; unsigned long eflags;
} __attribute__((packed)); } __attribute__((packed));
#define loaddebug(thread,register) \ #define loaddebug(thread,register) \
...@@ -52,11 +52,11 @@ extern unsigned long saved_edi; ...@@ -52,11 +52,11 @@ extern unsigned long saved_edi;
static inline void acpi_save_register_state(unsigned long return_point) static inline void acpi_save_register_state(unsigned long return_point)
{ {
saved_eip = return_point; saved_eip = return_point;
asm volatile ("movl %%esp,(%0)" : "=m" (saved_esp)); asm volatile ("movl %%esp,%0" : "=m" (saved_esp));
asm volatile ("movl %%ebp,(%0)" : "=m" (saved_ebp)); asm volatile ("movl %%ebp,%0" : "=m" (saved_ebp));
asm volatile ("movl %%ebx,(%0)" : "=m" (saved_ebx)); asm volatile ("movl %%ebx,%0" : "=m" (saved_ebx));
asm volatile ("movl %%edi,(%0)" : "=m" (saved_edi)); asm volatile ("movl %%edi,%0" : "=m" (saved_edi));
asm volatile ("movl %%esi,(%0)" : "=m" (saved_esi)); asm volatile ("movl %%esi,%0" : "=m" (saved_esi));
} }
#define acpi_restore_register_state() do {} while (0) #define acpi_restore_register_state() do {} while (0)
......
...@@ -64,7 +64,6 @@ int __verify_write(const void *, unsigned long); ...@@ -64,7 +64,6 @@ int __verify_write(const void *, unsigned long);
#define access_ok(type,addr,size) ( (__range_ok(addr,size) == 0) && \ #define access_ok(type,addr,size) ( (__range_ok(addr,size) == 0) && \
((type) == VERIFY_READ || boot_cpu_data.wp_works_ok || \ ((type) == VERIFY_READ || boot_cpu_data.wp_works_ok || \
segment_eq(get_fs(),KERNEL_DS) || \
__verify_write((void *)(addr),(size)))) __verify_write((void *)(addr),(size))))
#endif #endif
......
...@@ -39,6 +39,7 @@ extern void _clear_page(void *page); ...@@ -39,6 +39,7 @@ extern void _clear_page(void *page);
#define clear_page(X) _clear_page((void *)(X)) #define clear_page(X) _clear_page((void *)(X))
struct page; struct page;
extern void clear_user_page(void *addr, unsigned long vaddr, struct page *page); extern void clear_user_page(void *addr, unsigned long vaddr, struct page *page);
#define copy_page(X,Y) __memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topage); extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topage);
/* GROSS, defining this makes gcc pass these types as aggregates, /* GROSS, defining this makes gcc pass these types as aggregates,
......
...@@ -1239,6 +1239,7 @@ extern int sb_min_blocksize(struct super_block *, int); ...@@ -1239,6 +1239,7 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *);
extern ssize_t generic_file_aio_read(struct kiocb *, char *, size_t, loff_t); extern ssize_t generic_file_aio_read(struct kiocb *, char *, size_t, loff_t);
......
...@@ -73,4 +73,15 @@ static inline void copy_user_highpage(struct page *to, struct page *from, unsign ...@@ -73,4 +73,15 @@ static inline void copy_user_highpage(struct page *to, struct page *from, unsign
kunmap_atomic(vto, KM_USER1); kunmap_atomic(vto, KM_USER1);
} }
static inline void copy_highpage(struct page *to, struct page *from)
{
char *vfrom, *vto;
vfrom = kmap_atomic(from, KM_USER0);
vto = kmap_atomic(to, KM_USER1);
copy_page(vto, vfrom);
kunmap_atomic(vfrom, KM_USER0);
kunmap_atomic(vto, KM_USER1);
}
#endif /* _LINUX_HIGHMEM_H */ #endif /* _LINUX_HIGHMEM_H */
...@@ -27,6 +27,7 @@ extern int get_filesystem_list(char * buf); ...@@ -27,6 +27,7 @@ extern int get_filesystem_list(char * buf);
extern asmlinkage long sys_mount(char *dev_name, char *dir_name, char *type, extern asmlinkage long sys_mount(char *dev_name, char *dir_name, char *type,
unsigned long flags, void *data); unsigned long flags, void *data);
extern asmlinkage long sys_mkdir(const char *name, int mode); extern asmlinkage long sys_mkdir(const char *name, int mode);
extern asmlinkage long sys_rmdir(const char *name);
extern asmlinkage long sys_chdir(const char *name); extern asmlinkage long sys_chdir(const char *name);
extern asmlinkage long sys_fchdir(int fd); extern asmlinkage long sys_fchdir(int fd);
extern asmlinkage long sys_chroot(const char *name); extern asmlinkage long sys_chroot(const char *name);
...@@ -55,6 +56,7 @@ int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */ ...@@ -55,6 +56,7 @@ int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
int root_mountflags = MS_RDONLY | MS_VERBOSE; int root_mountflags = MS_RDONLY | MS_VERBOSE;
static char root_device_name[64]; static char root_device_name[64];
static char saved_root_name[64];
/* this is initialized in init/main.c */ /* this is initialized in init/main.c */
dev_t ROOT_DEV; dev_t ROOT_DEV;
...@@ -87,169 +89,135 @@ static int __init readwrite(char *str) ...@@ -87,169 +89,135 @@ static int __init readwrite(char *str)
__setup("ro", readonly); __setup("ro", readonly);
__setup("rw", readwrite); __setup("rw", readwrite);
static struct dev_name_struct { static __init dev_t try_name(char *name, int part)
const char *name; {
const int num; char path[64];
} root_dev_names[] __initdata = { char buf[32];
{ "nfs", 0x00ff }, int range;
{ "hda", 0x0300 }, dev_t res;
{ "hdb", 0x0340 }, char *s;
{ "loop", 0x0700 }, int len;
{ "hdc", 0x1600 }, int fd;
{ "hdd", 0x1640 },
{ "hde", 0x2100 }, /* read device number from .../dev */
{ "hdf", 0x2140 },
{ "hdg", 0x2200 }, sprintf(path, "/sys/bus/block/devices/%s/dev", name);
{ "hdh", 0x2240 }, fd = open(path, 0, 0);
{ "hdi", 0x3800 }, if (fd < 0)
{ "hdj", 0x3840 }, goto fail;
{ "hdk", 0x3900 }, len = read(fd, buf, 32);
{ "hdl", 0x3940 }, close(fd);
{ "hdm", 0x5800 }, if (len <= 0 || len == 32 || buf[len - 1] != '\n')
{ "hdn", 0x5840 }, goto fail;
{ "hdo", 0x5900 }, buf[len - 1] = '\0';
{ "hdp", 0x5940 }, res = (dev_t) simple_strtoul(buf, &s, 16);
{ "hdq", 0x5A00 }, if (*s)
{ "hdr", 0x5A40 }, goto fail;
{ "hds", 0x5B00 },
{ "hdt", 0x5B40 }, /* if it's there and we are not looking for a partition - that's it */
{ "sda", 0x0800 }, if (!part)
{ "sdb", 0x0810 }, return res;
{ "sdc", 0x0820 },
{ "sdd", 0x0830 }, /* otherwise read range from .../range */
{ "sde", 0x0840 }, sprintf(path, "/sys/bus/block/devices/%s/range", name);
{ "sdf", 0x0850 }, fd = open(path, 0, 0);
{ "sdg", 0x0860 }, if (fd < 0)
{ "sdh", 0x0870 }, goto fail;
{ "sdi", 0x0880 }, len = read(fd, buf, 32);
{ "sdj", 0x0890 }, close(fd);
{ "sdk", 0x08a0 }, if (len <= 0 || len == 32 || buf[len - 1] != '\n')
{ "sdl", 0x08b0 }, goto fail;
{ "sdm", 0x08c0 }, buf[len - 1] = '\0';
{ "sdn", 0x08d0 }, range = simple_strtoul(buf, &s, 10);
{ "sdo", 0x08e0 }, if (*s)
{ "sdp", 0x08f0 }, goto fail;
{ "ada", 0x1c00 },
{ "adb", 0x1c10 }, /* if partition is within range - we got it */
{ "adc", 0x1c20 }, if (part < range)
{ "add", 0x1c30 }, return res + part;
{ "ade", 0x1c40 }, fail:
{ "fd", 0x0200 }, return (dev_t) 0;
{ "md", 0x0900 },
{ "xda", 0x0d00 },
{ "xdb", 0x0d40 },
{ "ram", 0x0100 },
{ "scd", 0x0b00 },
{ "mcd", 0x1700 },
{ "cdu535", 0x1800 },
{ "sonycd", 0x1800 },
{ "aztcd", 0x1d00 },
{ "cm206cd", 0x2000 },
{ "gscd", 0x1000 },
{ "sbpcd", 0x1900 },
{ "eda", 0x2400 },
{ "edb", 0x2440 },
{ "pda", 0x2d00 },
{ "pdb", 0x2d10 },
{ "pdc", 0x2d20 },
{ "pdd", 0x2d30 },
{ "pcd", 0x2e00 },
{ "pf", 0x2f00 },
{ "apblock", APBLOCK_MAJOR << 8},
{ "ddv", DDV_MAJOR << 8},
{ "jsfd", JSFD_MAJOR << 8},
#if defined(CONFIG_ARCH_S390)
{ "dasda", (DASD_MAJOR << MINORBITS) },
{ "dasdb", (DASD_MAJOR << MINORBITS) + (1 << 2) },
{ "dasdc", (DASD_MAJOR << MINORBITS) + (2 << 2) },
{ "dasdd", (DASD_MAJOR << MINORBITS) + (3 << 2) },
{ "dasde", (DASD_MAJOR << MINORBITS) + (4 << 2) },
{ "dasdf", (DASD_MAJOR << MINORBITS) + (5 << 2) },
{ "dasdg", (DASD_MAJOR << MINORBITS) + (6 << 2) },
{ "dasdh", (DASD_MAJOR << MINORBITS) + (7 << 2) },
#endif
#if defined(CONFIG_BLK_CPQ_DA) || defined(CONFIG_BLK_CPQ_DA_MODULE)
{ "ida/c0d0p",0x4800 },
{ "ida/c0d1p",0x4810 },
{ "ida/c0d2p",0x4820 },
{ "ida/c0d3p",0x4830 },
{ "ida/c0d4p",0x4840 },
{ "ida/c0d5p",0x4850 },
{ "ida/c0d6p",0x4860 },
{ "ida/c0d7p",0x4870 },
{ "ida/c0d8p",0x4880 },
{ "ida/c0d9p",0x4890 },
{ "ida/c0d10p",0x48A0 },
{ "ida/c0d11p",0x48B0 },
{ "ida/c0d12p",0x48C0 },
{ "ida/c0d13p",0x48D0 },
{ "ida/c0d14p",0x48E0 },
{ "ida/c0d15p",0x48F0 },
#endif
#if defined(CONFIG_BLK_CPQ_CISS_DA) || defined(CONFIG_BLK_CPQ_CISS_DA_MODULE)
{ "cciss/c0d0p",0x6800 },
{ "cciss/c0d1p",0x6810 },
{ "cciss/c0d2p",0x6820 },
{ "cciss/c0d3p",0x6830 },
{ "cciss/c0d4p",0x6840 },
{ "cciss/c0d5p",0x6850 },
{ "cciss/c0d6p",0x6860 },
{ "cciss/c0d7p",0x6870 },
{ "cciss/c0d8p",0x6880 },
{ "cciss/c0d9p",0x6890 },
{ "cciss/c0d10p",0x68A0 },
{ "cciss/c0d11p",0x68B0 },
{ "cciss/c0d12p",0x68C0 },
{ "cciss/c0d13p",0x68D0 },
{ "cciss/c0d14p",0x68E0 },
{ "cciss/c0d15p",0x68F0 },
#endif
{ "nftla", 0x5d00 },
{ "nftlb", 0x5d10 },
{ "nftlc", 0x5d20 },
{ "nftld", 0x5d30 },
{ "ftla", 0x2c00 },
{ "ftlb", 0x2c08 },
{ "ftlc", 0x2c10 },
{ "ftld", 0x2c18 },
{ "mtdblock", 0x1f00 },
{ NULL, 0 }
};
kdev_t __init name_to_kdev_t(char *line)
{
int base = 0;
if (strncmp(line,"/dev/",5) == 0) {
struct dev_name_struct *dev = root_dev_names;
line += 5;
do {
int len = strlen(dev->name);
if (strncmp(line,dev->name,len) == 0) {
line += len;
base = dev->num;
break;
}
dev++;
} while (dev->name);
}
return to_kdev_t(base + simple_strtoul(line,NULL,base?10:16));
} }
static int __init root_dev_setup(char *line) /*
* Convert a name into device number. We accept the following variants:
*
* 1) device number in hexadecimal represents itself
* 2) /dev/nfs represents Root_NFS (0xff)
* 3) /dev/<disk_name> represents the device number of disk
* 4) /dev/<disk_name><decimal> represents the device number
* of partition - device number of disk plus the partition number
* 5) /dev/<disk_name>p<decimal> - same as the above, that form is
* used when disk name of partitioned disk ends on a digit.
*
* If name doesn't have fall into the categories above, we return 0.
* Driverfs is used to check if something is a disk name - it has
* all known disks under bus/block/devices. If the disk name
* contains slashes, name of driverfs node has them replaced with
* dots. try_name() does the actual checks, assuming that driverfs
* is mounted on rootfs /sys.
*/
__init dev_t name_to_dev_t(char *name)
{ {
int i; char s[32];
char ch; char *p;
dev_t res = 0;
int part;
ROOT_DEV = kdev_t_to_nr(name_to_kdev_t(line)); sys_mkdir("/sys", 0700);
memset (root_device_name, 0, sizeof root_device_name); if (sys_mount("driverfs", "/sys", "driverfs", 0, NULL) < 0)
if (strncmp (line, "/dev/", 5) == 0) line += 5; goto out;
for (i = 0; i < sizeof root_device_name - 1; ++i)
{ if (strncmp(name, "/dev/", 5) != 0) {
ch = line[i]; res = (dev_t) simple_strtoul(name, &p, 16);
if ( isspace (ch) || (ch == ',') || (ch == '\0') ) break; if (*p)
root_device_name[i] = ch; goto fail;
goto done;
} }
name += 5;
res = Root_NFS;
if (strcmp(name, "nfs") == 0)
goto done;
if (strlen(name) > 31)
goto fail;
strcpy(s, name);
for (p = s; *p; p++)
if (*p == '/')
*p = '.';
res = try_name(s, 0);
if (res)
goto done;
while (p > s && isdigit(p[-1]))
p--;
if (p == s || !*p || *p == '0')
goto fail;
part = simple_strtoul(p, NULL, 10);
*p = '\0';
res = try_name(s, part);
if (res)
goto done;
if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
goto fail;
p[-1] = '\0';
res = try_name(s, part);
done:
sys_umount("/sys", 0);
out:
sys_rmdir("/sys");
return res;
fail:
res = (dev_t) 0;
goto done;
}
static int __init root_dev_setup(char *line)
{
strncpy(saved_root_name, line, 64);
saved_root_name[63] = '\0';
return 1; return 1;
} }
...@@ -768,6 +736,13 @@ static int __init initrd_load(void) ...@@ -768,6 +736,13 @@ static int __init initrd_load(void)
void prepare_namespace(void) void prepare_namespace(void)
{ {
int is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; int is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
if (saved_root_name[0]) {
char *p = saved_root_name;
ROOT_DEV = name_to_dev_t(p);
if (strncmp(p, "/dev/", 5) == 0)
p += 5;
strcpy(root_device_name, p);
}
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
if (!initrd_start) if (!initrd_start)
mount_initrd = 0; mount_initrd = 0;
......
...@@ -57,12 +57,13 @@ ...@@ -57,12 +57,13 @@
#include <linux/pm.h> #include <linux/pm.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/swapops.h>
#include <linux/bootmem.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/io.h> #include <asm/io.h>
#include <linux/swapops.h>
extern void signal_wake_up(struct task_struct *t); extern void signal_wake_up(struct task_struct *t);
extern int sys_sync(void); extern int sys_sync(void);
...@@ -225,7 +226,7 @@ int freeze_processes(void) ...@@ -225,7 +226,7 @@ int freeze_processes(void)
todo++; todo++;
} while_each_thread(g, p); } while_each_thread(g, p);
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
yield(); yield(); /* Yield is okay here */
if (time_after(jiffies, start_time + TIMEOUT)) { if (time_after(jiffies, start_time + TIMEOUT)) {
printk( "\n" ); printk( "\n" );
printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo ); printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
...@@ -309,6 +310,9 @@ static void mark_swapfiles(swp_entry_t prev, int mode) ...@@ -309,6 +310,9 @@ static void mark_swapfiles(swp_entry_t prev, int mode)
union diskpage *cur; union diskpage *cur;
struct page *page; struct page *page;
if (root_swap == 0xFFFF) /* ignored */
return;
page = alloc_page(GFP_ATOMIC); page = alloc_page(GFP_ATOMIC);
if (!page) if (!page)
panic("Out of memory in mark_swapfiles"); panic("Out of memory in mark_swapfiles");
...@@ -474,9 +478,9 @@ static int count_and_copy_data_pages(struct pbe *pagedir_p) ...@@ -474,9 +478,9 @@ static int count_and_copy_data_pages(struct pbe *pagedir_p)
#ifdef CONFIG_DISCONTIGMEM #ifdef CONFIG_DISCONTIGMEM
panic("Discontingmem not supported"); panic("Discontingmem not supported");
#else #else
BUG_ON (max_mapnr != num_physpages); BUG_ON (max_pfn != num_physpages);
#endif #endif
for (pfn = 0; pfn < max_mapnr; pfn++) { for (pfn = 0; pfn < max_pfn; pfn++) {
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
if (PageHighMem(page)) if (PageHighMem(page))
panic("Swsusp not supported on highmem boxes. Send 1GB of RAM to <pavel@ucw.cz> and try again ;-)."); panic("Swsusp not supported on highmem boxes. Send 1GB of RAM to <pavel@ucw.cz> and try again ;-).");
...@@ -686,6 +690,7 @@ static int suspend_save_image(void) ...@@ -686,6 +690,7 @@ static int suspend_save_image(void)
if(nr_free_pages() < nr_needed_pages) { if(nr_free_pages() < nr_needed_pages) {
printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n", printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
name_suspend, nr_needed_pages-nr_free_pages()); name_suspend, nr_needed_pages-nr_free_pages());
root_swap = 0xFFFF;
spin_unlock_irq(&suspend_pagedir_lock); spin_unlock_irq(&suspend_pagedir_lock);
return 1; return 1;
} }
...@@ -1042,7 +1047,7 @@ static int bdev_write_page(struct block_device *bdev, long pos, void *buf) ...@@ -1042,7 +1047,7 @@ static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
return 0; return 0;
} }
extern kdev_t __init name_to_kdev_t(const char *line); extern dev_t __init name_to_dev_t(const char *line);
static int __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume) static int __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
{ {
...@@ -1138,7 +1143,7 @@ static int read_suspend_image(const char * specialfile, int noresume) ...@@ -1138,7 +1143,7 @@ static int read_suspend_image(const char * specialfile, int noresume)
unsigned long scratch_page = 0; unsigned long scratch_page = 0;
int error; int error;
resume_device = name_to_kdev_t(specialfile); resume_device = to_kdev_t(name_to_dev_t(specialfile));
scratch_page = get_zeroed_page(GFP_ATOMIC); scratch_page = get_zeroed_page(GFP_ATOMIC);
cur = (void *) scratch_page; cur = (void *) scratch_page;
if (cur) { if (cur) {
......
...@@ -916,7 +916,7 @@ generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) ...@@ -916,7 +916,7 @@ generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
return ret; return ret;
} }
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{ {
ssize_t written; ssize_t written;
unsigned long count = desc->count; unsigned long count = desc->count;
......
...@@ -51,6 +51,16 @@ ...@@ -51,6 +51,16 @@
/* Keep swapped page count in private field of indirect struct page */ /* Keep swapped page count in private field of indirect struct page */
#define nr_swapped private #define nr_swapped private
/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
SGP_WRITE, /* may exceed i_size, may allocate page */
};
static int shmem_getpage(struct inode *inode, unsigned long idx,
struct page **pagep, enum sgp_type sgp);
static inline struct page *shmem_dir_alloc(unsigned int gfp_mask) static inline struct page *shmem_dir_alloc(unsigned int gfp_mask)
{ {
/* /*
...@@ -132,8 +142,7 @@ static void shmem_free_block(struct inode *inode) ...@@ -132,8 +142,7 @@ static void shmem_free_block(struct inode *inode)
* @inode: inode to recalc * @inode: inode to recalc
* *
* We have to calculate the free blocks since the mm can drop * We have to calculate the free blocks since the mm can drop
* undirtied hole pages behind our back. Later we should be * undirtied hole pages behind our back.
* able to use the releasepage method to handle this better.
* *
* But normally info->alloced == inode->i_mapping->nrpages + info->swapped * But normally info->alloced == inode->i_mapping->nrpages + info->swapped
* So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
...@@ -200,8 +209,6 @@ static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long ...@@ -200,8 +209,6 @@ static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long
struct page **dir; struct page **dir;
struct page *subdir; struct page *subdir;
if (index >= info->next_index)
return NULL;
if (index < SHMEM_NR_DIRECT) if (index < SHMEM_NR_DIRECT)
return info->i_direct+index; return info->i_direct+index;
if (!info->i_indirect) { if (!info->i_indirect) {
...@@ -274,20 +281,23 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns ...@@ -274,20 +281,23 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
* *
* @info: info structure for the inode * @info: info structure for the inode
* @index: index of the page to find * @index: index of the page to find
* @sgp: check and recheck i_size? skip allocation?
*/ */
static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index) static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
{ {
struct inode *inode = &info->vfs_inode; struct inode *inode = &info->vfs_inode;
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
struct page *page = NULL; struct page *page = NULL;
swp_entry_t *entry; swp_entry_t *entry;
static const swp_entry_t unswapped = {0};
while (!(entry = shmem_swp_entry(info, index, &page))) { if (sgp != SGP_WRITE &&
if (index >= info->next_index) { ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size)
entry = ERR_PTR(-EFAULT); return ERR_PTR(-EINVAL);
break;
}
while (!(entry = shmem_swp_entry(info, index, &page))) {
if (sgp == SGP_READ)
return (swp_entry_t *) &unswapped;
/* /*
* Test free_blocks against 1 not 0, since we have 1 data * Test free_blocks against 1 not 0, since we have 1 data
* page (and perhaps indirect index pages) yet to allocate: * page (and perhaps indirect index pages) yet to allocate:
...@@ -314,12 +324,21 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long ...@@ -314,12 +324,21 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
shmem_free_block(inode); shmem_free_block(inode);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
if (sgp != SGP_WRITE &&
((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) {
entry = ERR_PTR(-EINVAL);
break;
}
if (info->next_index <= index)
info->next_index = index + 1;
} }
if (page) { if (page) {
/* another task gave its page, or truncated the file */ /* another task gave its page, or truncated the file */
shmem_free_block(inode); shmem_free_block(inode);
shmem_dir_free(page); shmem_dir_free(page);
} }
if (info->next_index <= index && !IS_ERR(entry))
info->next_index = index + 1;
return entry; return entry;
} }
...@@ -470,7 +489,6 @@ static void shmem_truncate(struct inode *inode) ...@@ -470,7 +489,6 @@ static void shmem_truncate(struct inode *inode)
static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
{ {
static struct page *shmem_holdpage(struct inode *, unsigned long);
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct page *page = NULL; struct page *page = NULL;
long change = 0; long change = 0;
...@@ -495,8 +513,9 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) ...@@ -495,8 +513,9 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
* it assigned to swap. * it assigned to swap.
*/ */
if (attr->ia_size & (PAGE_CACHE_SIZE-1)) { if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
page = shmem_holdpage(inode, (void) shmem_getpage(inode,
attr->ia_size >> PAGE_CACHE_SHIFT); attr->ia_size>>PAGE_CACHE_SHIFT,
&page, SGP_READ);
} }
} }
} }
...@@ -672,6 +691,7 @@ static int shmem_writepage(struct page *page) ...@@ -672,6 +691,7 @@ static int shmem_writepage(struct page *page)
spin_lock(&info->lock); spin_lock(&info->lock);
shmem_recalc_inode(inode); shmem_recalc_inode(inode);
BUG_ON(index >= info->next_index);
entry = shmem_swp_entry(info, index, NULL); entry = shmem_swp_entry(info, index, NULL);
BUG_ON(!entry); BUG_ON(!entry);
BUG_ON(entry->val); BUG_ON(entry->val);
...@@ -710,57 +730,55 @@ static int shmem_vm_writeback(struct page *page, struct writeback_control *wbc) ...@@ -710,57 +730,55 @@ static int shmem_vm_writeback(struct page *page, struct writeback_control *wbc)
* vm. If we swap it in we mark it dirty since we also free the swap * vm. If we swap it in we mark it dirty since we also free the swap
* entry since a page cannot live in both the swap and page cache * entry since a page cannot live in both the swap and page cache
*/ */
static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep) static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
{ {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo; struct shmem_sb_info *sbinfo;
struct page *page; struct page *filepage = *pagep;
struct page *swappage;
swp_entry_t *entry; swp_entry_t *entry;
swp_entry_t swap; swp_entry_t swap;
int error = 0; int error;
if (idx >= SHMEM_MAX_INDEX) if (idx >= SHMEM_MAX_INDEX)
return -EFBIG; return -EFBIG;
/* /*
* When writing, i_sem is held against truncation and other * Normally, filepage is NULL on entry, and either found
* writing, so next_index will remain as set here; but when * uptodate immediately, or allocated and zeroed, or read
* reading, idx must always be checked against next_index * in under swappage, which is then assigned to filepage.
* after sleeping, lest truncation occurred meanwhile. * But shmem_readpage and shmem_prepare_write pass in a locked
* filepage, which may be found not uptodate by other callers
* too, and may need to be copied from the swappage read in.
*/ */
spin_lock(&info->lock);
if (info->next_index <= idx)
info->next_index = idx + 1;
spin_unlock(&info->lock);
repeat: repeat:
page = find_lock_page(mapping, idx); if (!filepage)
if (page) { filepage = find_lock_page(mapping, idx);
*pagep = page; if (filepage && PageUptodate(filepage))
return 0; goto done;
} error = 0;
spin_lock(&info->lock); spin_lock(&info->lock);
shmem_recalc_inode(inode); shmem_recalc_inode(inode);
entry = shmem_swp_alloc(info, idx); entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry)) { if (IS_ERR(entry)) {
spin_unlock(&info->lock); spin_unlock(&info->lock);
return PTR_ERR(entry); error = PTR_ERR(entry);
goto failed;
} }
swap = *entry; swap = *entry;
if (swap.val) { if (swap.val) {
/* Look it up and read it in.. */ /* Look it up and read it in.. */
page = lookup_swap_cache(swap); swappage = lookup_swap_cache(swap);
if (!page) { if (!swappage) {
shmem_swp_unmap(entry); shmem_swp_unmap(entry);
spin_unlock(&info->lock); spin_unlock(&info->lock);
swapin_readahead(swap); swapin_readahead(swap);
page = read_swap_cache_async(swap); swappage = read_swap_cache_async(swap);
if (!page) { if (!swappage) {
spin_lock(&info->lock); spin_lock(&info->lock);
entry = shmem_swp_alloc(info, idx); entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry)) if (IS_ERR(entry))
error = PTR_ERR(entry); error = PTR_ERR(entry);
else { else {
...@@ -770,66 +788,105 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **p ...@@ -770,66 +788,105 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **p
} }
spin_unlock(&info->lock); spin_unlock(&info->lock);
if (error) if (error)
return error; goto failed;
goto repeat; goto repeat;
} }
wait_on_page_locked(page); wait_on_page_locked(swappage);
page_cache_release(page); page_cache_release(swappage);
goto repeat; goto repeat;
} }
/* We have to do this with page locked to prevent races */ /* We have to do this with page locked to prevent races */
if (TestSetPageLocked(page)) { if (TestSetPageLocked(swappage)) {
shmem_swp_unmap(entry); shmem_swp_unmap(entry);
spin_unlock(&info->lock); spin_unlock(&info->lock);
wait_on_page_locked(page); wait_on_page_locked(swappage);
page_cache_release(page); page_cache_release(swappage);
goto repeat; goto repeat;
} }
if (PageWriteback(page)) { if (PageWriteback(swappage)) {
shmem_swp_unmap(entry); shmem_swp_unmap(entry);
spin_unlock(&info->lock); spin_unlock(&info->lock);
wait_on_page_writeback(page); wait_on_page_writeback(swappage);
unlock_page(page); unlock_page(swappage);
page_cache_release(page); page_cache_release(swappage);
goto repeat; goto repeat;
} }
if (!PageUptodate(swappage)) {
error = PageUptodate(page)?
move_from_swap_cache(page, idx, mapping): -EIO;
if (error) {
shmem_swp_unmap(entry); shmem_swp_unmap(entry);
spin_unlock(&info->lock); spin_unlock(&info->lock);
unlock_page(page); unlock_page(swappage);
page_cache_release(page); page_cache_release(swappage);
return error; error = -EIO;
goto failed;
} }
if (filepage) {
shmem_swp_set(info, entry, 0); shmem_swp_set(info, entry, 0);
shmem_swp_unmap(entry); shmem_swp_unmap(entry);
delete_from_swap_cache(swappage);
spin_unlock(&info->lock); spin_unlock(&info->lock);
flush_page_to_ram(swappage);
copy_highpage(filepage, swappage);
unlock_page(swappage);
page_cache_release(swappage);
flush_dcache_page(filepage);
SetPageUptodate(filepage);
set_page_dirty(filepage);
swap_free(swap);
} else if (!(error = move_from_swap_cache(
swappage, idx, mapping))) {
shmem_swp_set(info, entry, 0);
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
filepage = swappage;
swap_free(swap); swap_free(swap);
} else { } else {
shmem_swp_unmap(entry); shmem_swp_unmap(entry);
spin_unlock(&info->lock); spin_unlock(&info->lock);
unlock_page(swappage);
page_cache_release(swappage);
if (error != -EEXIST)
goto failed;
goto repeat;
}
} else if (sgp == SGP_READ && !filepage) {
shmem_swp_unmap(entry);
filepage = find_get_page(mapping, idx);
if (filepage &&
(!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
spin_unlock(&info->lock);
wait_on_page_locked(filepage);
page_cache_release(filepage);
filepage = NULL;
goto repeat;
}
spin_unlock(&info->lock);
} else {
shmem_swp_unmap(entry);
sbinfo = SHMEM_SB(inode->i_sb); sbinfo = SHMEM_SB(inode->i_sb);
spin_lock(&sbinfo->stat_lock); spin_lock(&sbinfo->stat_lock);
if (sbinfo->free_blocks == 0) { if (sbinfo->free_blocks == 0) {
spin_unlock(&sbinfo->stat_lock); spin_unlock(&sbinfo->stat_lock);
return -ENOSPC; spin_unlock(&info->lock);
error = -ENOSPC;
goto failed;
} }
sbinfo->free_blocks--; sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE; inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock); spin_unlock(&sbinfo->stat_lock);
page = page_cache_alloc(mapping); if (!filepage) {
if (!page) { spin_unlock(&info->lock);
filepage = page_cache_alloc(mapping);
if (!filepage) {
shmem_free_block(inode); shmem_free_block(inode);
return -ENOMEM; error = -ENOMEM;
goto failed;
} }
spin_lock(&info->lock); spin_lock(&info->lock);
entry = shmem_swp_alloc(info, idx); entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry)) if (IS_ERR(entry))
error = PTR_ERR(entry); error = PTR_ERR(entry);
else { else {
...@@ -837,58 +894,46 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **p ...@@ -837,58 +894,46 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **p
shmem_swp_unmap(entry); shmem_swp_unmap(entry);
} }
if (error || swap.val || if (error || swap.val ||
add_to_page_cache_lru(page, mapping, idx) < 0) { (error = add_to_page_cache_lru(
filepage, mapping, idx))) {
spin_unlock(&info->lock); spin_unlock(&info->lock);
page_cache_release(page); page_cache_release(filepage);
shmem_free_block(inode); shmem_free_block(inode);
if (error) filepage = NULL;
return error; if (error != -EEXIST)
goto failed;
goto repeat; goto repeat;
} }
}
info->alloced++; info->alloced++;
spin_unlock(&info->lock); spin_unlock(&info->lock);
clear_highpage(page); clear_highpage(filepage);
SetPageUptodate(page); flush_dcache_page(filepage);
SetPageUptodate(filepage);
}
done:
if (!*pagep) {
if (filepage) {
unlock_page(filepage);
*pagep = filepage;
} else
*pagep = ZERO_PAGE(0);
} }
/* We have the page */
*pagep = page;
return 0; return 0;
}
static struct page *shmem_holdpage(struct inode *inode, unsigned long idx) failed:
{ if (*pagep != filepage) {
struct shmem_inode_info *info = SHMEM_I(inode); unlock_page(filepage);
struct page *page; page_cache_release(filepage);
swp_entry_t *entry;
swp_entry_t swap = {0};
/*
* Somehow, it feels wrong for truncation down to cause any
* allocation: so instead of a blind shmem_getpage, check that
* the page has actually been instantiated before holding it.
*/
spin_lock(&info->lock);
page = find_get_page(inode->i_mapping, idx);
if (!page) {
entry = shmem_swp_entry(info, idx, NULL);
if (entry) {
swap = *entry;
shmem_swp_unmap(entry);
}
}
spin_unlock(&info->lock);
if (swap.val) {
if (shmem_getpage(inode, idx, &page) == 0)
unlock_page(page);
} }
return page; return error;
} }
struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused) struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
{ {
struct inode *inode = vma->vm_file->f_dentry->d_inode; struct inode *inode = vma->vm_file->f_dentry->d_inode;
struct page *page; struct page *page = NULL;
unsigned long idx; unsigned long idx;
int error; int error;
...@@ -896,14 +941,10 @@ struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int ...@@ -896,14 +941,10 @@ struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int
idx += vma->vm_pgoff; idx += vma->vm_pgoff;
idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
if (((loff_t) idx << PAGE_CACHE_SHIFT) >= inode->i_size) error = shmem_getpage(inode, idx, &page, SGP_CACHE);
return NOPAGE_SIGBUS;
error = shmem_getpage(inode, idx, &page);
if (error) if (error)
return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
unlock_page(page);
flush_page_to_ram(page); flush_page_to_ram(page);
return page; return page;
} }
...@@ -1017,13 +1058,33 @@ static int shmem_set_size(struct shmem_sb_info *info, ...@@ -1017,13 +1058,33 @@ static int shmem_set_size(struct shmem_sb_info *info,
static struct inode_operations shmem_symlink_inode_operations; static struct inode_operations shmem_symlink_inode_operations;
static struct inode_operations shmem_symlink_inline_operations; static struct inode_operations shmem_symlink_inline_operations;
/*
* tmpfs itself makes no use of generic_file_read, generic_file_mmap
* or generic_file_write; but shmem_readpage, shmem_prepare_write and
* simple_commit_write let a tmpfs file be used below the loop driver.
*/
static int
shmem_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
int error = shmem_getpage(inode, page->index, &page, SGP_CACHE);
unlock_page(page);
return error;
}
static int
shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
struct inode *inode = page->mapping->host;
return shmem_getpage(inode, page->index, &page, SGP_WRITE);
}
static ssize_t static ssize_t
shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
{ {
struct inode *inode = file->f_dentry->d_inode; struct inode *inode = file->f_dentry->d_inode;
unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
loff_t pos; loff_t pos;
struct page *page;
unsigned long written; unsigned long written;
long status; long status;
int err; int err;
...@@ -1073,11 +1134,45 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) ...@@ -1073,11 +1134,45 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
send_sig(SIGXFSZ, current, 0); send_sig(SIGXFSZ, current, 0);
goto out; goto out;
} }
if (count > limit - pos) { if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
/* send_sig(SIGXFSZ, current, 0); */
count = limit - (u32)pos;
}
}
/*
* LFS rule
*/
if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
if (pos >= MAX_NON_LFS) {
send_sig(SIGXFSZ, current, 0);
goto out;
}
if (count > MAX_NON_LFS - (u32)pos) {
/* send_sig(SIGXFSZ, current, 0); */
count = MAX_NON_LFS - (u32)pos;
}
}
/*
* Are we about to exceed the fs block limit ?
*
* If we have written data it becomes a short write
* If we have exceeded without writing data we send
* a signal and give them an EFBIG.
*
* Linus frestrict idea will clean these up nicely..
*/
if (pos >= SHMEM_MAX_BYTES) {
if (count || pos > SHMEM_MAX_BYTES) {
send_sig(SIGXFSZ, current, 0); send_sig(SIGXFSZ, current, 0);
count = limit - pos; err = -EFBIG;
goto out;
} }
/* zero-length writes at ->s_maxbytes are OK */
} }
if (pos + count > SHMEM_MAX_BYTES)
count = SHMEM_MAX_BYTES - pos;
status = 0; status = 0;
if (count) { if (count) {
...@@ -1086,44 +1181,56 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) ...@@ -1086,44 +1181,56 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
} }
while (count) { while (count) {
struct page *page = NULL;
unsigned long bytes, index, offset; unsigned long bytes, index, offset;
char *kaddr; char *kaddr;
int left;
/*
* Try to find the page in the cache. If it isn't there,
* allocate a free page.
*/
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
index = pos >> PAGE_CACHE_SHIFT; index = pos >> PAGE_CACHE_SHIFT;
bytes = PAGE_CACHE_SIZE - offset; bytes = PAGE_CACHE_SIZE - offset;
if (bytes > count) { if (bytes > count)
bytes = count; bytes = count;
}
/* /*
* Bring in the user page that we will copy from _first_. * We don't hold page lock across copy from user -
* Otherwise there's a nasty deadlock on copying from the * what would it guard against? - so no deadlock here.
* same page as we're writing to, without it being marked * But it still may be a good idea to prefault below.
* up-to-date.
*/ */
{ volatile unsigned char dummy;
__get_user(dummy, buf);
__get_user(dummy, buf+bytes-1);
}
status = shmem_getpage(inode, index, &page); status = shmem_getpage(inode, index, &page, SGP_WRITE);
if (status) if (status)
break; break;
left = bytes;
if (PageHighMem(page)) {
volatile unsigned char dummy;
__get_user(dummy, buf);
__get_user(dummy, buf + bytes - 1);
kaddr = kmap_atomic(page, KM_USER0);
left = __copy_from_user(kaddr + offset, buf, bytes);
kunmap_atomic(kaddr, KM_USER0);
}
if (left) {
kaddr = kmap(page); kaddr = kmap(page);
status = __copy_from_user(kaddr+offset, buf, bytes); left = __copy_from_user(kaddr + offset, buf, bytes);
kunmap(page); kunmap(page);
if (status) }
goto fail_write;
flush_dcache_page(page); flush_dcache_page(page);
if (bytes > 0) { if (left) {
page_cache_release(page);
status = -EFAULT;
break;
}
set_page_dirty(page); set_page_dirty(page);
page_cache_release(page);
/*
* Balance dirty pages??
*/
written += bytes; written += bytes;
count -= bytes; count -= bytes;
pos += bytes; pos += bytes;
...@@ -1131,16 +1238,8 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) ...@@ -1131,16 +1238,8 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
if (pos > inode->i_size) if (pos > inode->i_size)
inode->i_size = pos; inode->i_size = pos;
} }
unlock:
/* Mark it unlocked again and drop the page.. */
unlock_page(page);
page_cache_release(page);
if (status < 0)
break;
}
*ppos = pos; *ppos = pos;
err = written ? written : status; err = written ? written : status;
out: out:
/* Short writes give back address space */ /* Short writes give back address space */
...@@ -1149,25 +1248,20 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) ...@@ -1149,25 +1248,20 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
out_nc: out_nc:
up(&inode->i_sem); up(&inode->i_sem);
return err; return err;
fail_write:
status = -EFAULT;
ClearPageUptodate(page);
goto unlock;
} }
static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc) static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
{ {
struct inode *inode = filp->f_dentry->d_inode; struct inode *inode = filp->f_dentry->d_inode;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
unsigned long index, offset; unsigned long index, offset;
int nr = 1;
index = *ppos >> PAGE_CACHE_SHIFT; index = *ppos >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK; offset = *ppos & ~PAGE_CACHE_MASK;
while (nr && desc->count) { for (;;) {
struct page *page; struct page *page = NULL;
unsigned long end_index, nr; unsigned long end_index, nr, ret;
end_index = inode->i_size >> PAGE_CACHE_SHIFT; end_index = inode->i_size >> PAGE_CACHE_SHIFT;
if (index > end_index) if (index > end_index)
...@@ -1178,9 +1272,9 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ ...@@ -1178,9 +1272,9 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
break; break;
} }
desc->error = shmem_getpage(inode, index, &page); desc->error = shmem_getpage(inode, index, &page, SGP_READ);
if (desc->error) { if (desc->error) {
if (desc->error == -EFAULT) if (desc->error == -EINVAL)
desc->error = 0; desc->error = 0;
break; break;
} }
...@@ -1194,15 +1288,18 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ ...@@ -1194,15 +1288,18 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
if (index == end_index) { if (index == end_index) {
nr = inode->i_size & ~PAGE_CACHE_MASK; nr = inode->i_size & ~PAGE_CACHE_MASK;
if (nr <= offset) { if (nr <= offset) {
unlock_page(page);
page_cache_release(page); page_cache_release(page);
break; break;
} }
} }
unlock_page(page);
nr -= offset; nr -= offset;
if (!list_empty(&mapping->i_mmap_shared)) /* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing
* before reading the page on the kernel side.
*/
if (!list_empty(&mapping->i_mmap_shared) &&
page != ZERO_PAGE(0))
flush_dcache_page(page); flush_dcache_page(page);
/* /*
...@@ -1215,12 +1312,14 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ ...@@ -1215,12 +1312,14 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
* "pos" here (the actor routine has to update the user buffer * "pos" here (the actor routine has to update the user buffer
* pointers and the remaining count). * pointers and the remaining count).
*/ */
nr = file_read_actor(desc, page, offset, nr); ret = actor(desc, page, offset, nr);
offset += nr; offset += ret;
index += offset >> PAGE_CACHE_SHIFT; index += offset >> PAGE_CACHE_SHIFT;
offset &= ~PAGE_CACHE_MASK; offset &= ~PAGE_CACHE_MASK;
page_cache_release(page); page_cache_release(page);
if (ret != nr || !desc->count)
break;
} }
*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
...@@ -1229,27 +1328,43 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ ...@@ -1229,27 +1328,43 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
{ {
ssize_t retval; read_descriptor_t desc;
retval = -EFAULT; if ((ssize_t) count < 0)
if (access_ok(VERIFY_WRITE, buf, count)) { return -EINVAL;
retval = 0; if (!access_ok(VERIFY_WRITE, buf, count))
return -EFAULT;
if (!count)
return 0;
if (count) { desc.written = 0;
desc.count = count;
desc.buf = buf;
desc.error = 0;
do_shmem_file_read(filp, ppos, &desc, file_read_actor);
if (desc.written)
return desc.written;
return desc.error;
}
static ssize_t shmem_file_sendfile(struct file *out_file,
struct file *in_file, loff_t *ppos, size_t count)
{
read_descriptor_t desc; read_descriptor_t desc;
if (!count)
return 0;
desc.written = 0; desc.written = 0;
desc.count = count; desc.count = count;
desc.buf = buf; desc.buf = (char *)out_file;
desc.error = 0; desc.error = 0;
do_shmem_file_read(filp, ppos, &desc);
retval = desc.written; do_shmem_file_read(in_file, ppos, &desc, file_send_actor);
if (!retval) if (desc.written)
retval = desc.error; return desc.written;
} return desc.error;
}
return retval;
} }
static int shmem_statfs(struct super_block *sb, struct statfs *buf) static int shmem_statfs(struct super_block *sb, struct statfs *buf)
...@@ -1317,39 +1432,6 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr ...@@ -1317,39 +1432,6 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
return 0; return 0;
} }
static inline int shmem_positive(struct dentry *dentry)
{
return dentry->d_inode && !d_unhashed(dentry);
}
/*
* Check that a directory is empty (this works
* for regular files too, they'll just always be
* considered empty..).
*
* Note that an empty directory can still have
* children, they just all have to be negative..
*/
static int shmem_empty(struct dentry *dentry)
{
struct list_head *list;
spin_lock(&dcache_lock);
list = dentry->d_subdirs.next;
while (list != &dentry->d_subdirs) {
struct dentry *de = list_entry(list, struct dentry, d_child);
if (shmem_positive(de)) {
spin_unlock(&dcache_lock);
return 0;
}
list = list->next;
}
spin_unlock(&dcache_lock);
return 1;
}
static int shmem_unlink(struct inode *dir, struct dentry *dentry) static int shmem_unlink(struct inode *dir, struct dentry *dentry)
{ {
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
...@@ -1363,7 +1445,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) ...@@ -1363,7 +1445,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
static int shmem_rmdir(struct inode *dir, struct dentry *dentry) static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{ {
if (!shmem_empty(dentry)) if (!simple_empty(dentry))
return -ENOTEMPTY; return -ENOTEMPTY;
dir->i_nlink--; dir->i_nlink--;
...@@ -1381,7 +1463,7 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct ...@@ -1381,7 +1463,7 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct
struct inode *inode = old_dentry->d_inode; struct inode *inode = old_dentry->d_inode;
int they_are_dirs = S_ISDIR(inode->i_mode); int they_are_dirs = S_ISDIR(inode->i_mode);
if (!shmem_empty(new_dentry)) if (!simple_empty(new_dentry))
return -ENOTEMPTY; return -ENOTEMPTY;
if (new_dentry->d_inode) { if (new_dentry->d_inode) {
...@@ -1406,7 +1488,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s ...@@ -1406,7 +1488,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
int error; int error;
int len; int len;
struct inode *inode; struct inode *inode;
struct page *page; struct page *page = NULL;
char *kaddr; char *kaddr;
struct shmem_inode_info *info; struct shmem_inode_info *info;
...@@ -1429,7 +1511,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s ...@@ -1429,7 +1511,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
iput(inode); iput(inode);
return -ENOMEM; return -ENOMEM;
} }
error = shmem_getpage(inode, 0, &page); error = shmem_getpage(inode, 0, &page, SGP_WRITE);
if (error) { if (error) {
vm_unacct_memory(VM_ACCT(1)); vm_unacct_memory(VM_ACCT(1));
iput(inode); iput(inode);
...@@ -1439,11 +1521,10 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s ...@@ -1439,11 +1521,10 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
spin_lock(&shmem_ilock); spin_lock(&shmem_ilock);
list_add_tail(&info->list, &shmem_inodes); list_add_tail(&info->list, &shmem_inodes);
spin_unlock(&shmem_ilock); spin_unlock(&shmem_ilock);
kaddr = kmap(page); kaddr = kmap_atomic(page, KM_USER0);
memcpy(kaddr, symname, len); memcpy(kaddr, symname, len);
kunmap(page); kunmap_atomic(kaddr, KM_USER0);
set_page_dirty(page); set_page_dirty(page);
unlock_page(page);
page_cache_release(page); page_cache_release(page);
} }
dir->i_size += BOGO_DIRENT_SIZE; dir->i_size += BOGO_DIRENT_SIZE;
...@@ -1465,26 +1546,24 @@ static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) ...@@ -1465,26 +1546,24 @@ static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen) static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
{ {
struct page *page; struct page *page = NULL;
int res = shmem_getpage(dentry->d_inode, 0, &page); int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
if (res) if (res)
return res; return res;
res = vfs_readlink(dentry, buffer, buflen, kmap(page)); res = vfs_readlink(dentry, buffer, buflen, kmap(page));
kunmap(page); kunmap(page);
unlock_page(page);
page_cache_release(page); page_cache_release(page);
return res; return res;
} }
static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd) static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
{ {
struct page *page; struct page *page = NULL;
int res = shmem_getpage(dentry->d_inode, 0, &page); int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
if (res) if (res)
return res; return res;
res = vfs_follow_link(nd, kmap(page)); res = vfs_follow_link(nd, kmap(page));
kunmap(page); kunmap(page);
unlock_page(page);
page_cache_release(page); page_cache_release(page);
return res; return res;
} }
...@@ -1569,15 +1648,10 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) ...@@ -1569,15 +1648,10 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
unsigned long max_blocks = sbinfo->max_blocks; unsigned long max_blocks = sbinfo->max_blocks;
unsigned long max_inodes = sbinfo->max_inodes; unsigned long max_inodes = sbinfo->max_inodes;
if (shmem_parse_options (data, NULL, NULL, NULL, &max_blocks, &max_inodes)) if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
return -EINVAL; return -EINVAL;
return shmem_set_size(sbinfo, max_blocks, max_inodes); return shmem_set_size(sbinfo, max_blocks, max_inodes);
} }
int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync)
{
return 0;
}
#endif #endif
static int shmem_fill_super(struct super_block *sb, void *data, int silent) static int shmem_fill_super(struct super_block *sb, void *data, int silent)
...@@ -1590,7 +1664,7 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1590,7 +1664,7 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
gid_t gid = current->fsgid; gid_t gid = current->fsgid;
struct shmem_sb_info *sbinfo; struct shmem_sb_info *sbinfo;
struct sysinfo si; struct sysinfo si;
int err; int err = -ENOMEM;
sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL); sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
if (!sbinfo) if (!sbinfo)
...@@ -1606,7 +1680,7 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1606,7 +1680,7 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
blocks = inodes = si.totalram / 2; blocks = inodes = si.totalram / 2;
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS
if (shmem_parse_options (data, &mode, &uid, &gid, &blocks, &inodes)) { if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) {
err = -EINVAL; err = -EINVAL;
goto failed; goto failed;
} }
...@@ -1620,28 +1694,29 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1620,28 +1694,29 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
sbinfo->max_inodes = inodes; sbinfo->max_inodes = inodes;
sbinfo->free_inodes = inodes; sbinfo->free_inodes = inodes;
sb->s_maxbytes = SHMEM_MAX_BYTES; sb->s_maxbytes = SHMEM_MAX_BYTES;
sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_bdev = bdget(sb->s_dev);
sb->s_blocksize_bits = PAGE_CACHE_SHIFT; if (!sb->s_bdev)
goto failed;
if (!sb_set_blocksize(sb, PAGE_CACHE_SIZE))
BUG();
sb->s_magic = TMPFS_MAGIC; sb->s_magic = TMPFS_MAGIC;
sb->s_op = &shmem_ops; sb->s_op = &shmem_ops;
inode = shmem_get_inode(sb, S_IFDIR | mode, 0); inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
if (!inode) { if (!inode)
err = -ENOMEM; goto failed_bdput;
goto failed;
}
inode->i_uid = uid; inode->i_uid = uid;
inode->i_gid = gid; inode->i_gid = gid;
root = d_alloc_root(inode); root = d_alloc_root(inode);
if (!root) { if (!root)
err = -ENOMEM;
goto failed_iput; goto failed_iput;
}
sb->s_root = root; sb->s_root = root;
return 0; return 0;
failed_iput: failed_iput:
iput(inode); iput(inode);
failed_bdput:
bdput(sb->s_bdev);
sb->s_bdev = NULL;
failed: failed:
kfree(sbinfo); kfree(sbinfo);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
...@@ -1650,6 +1725,8 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1650,6 +1725,8 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
static void shmem_put_super(struct super_block *sb) static void shmem_put_super(struct super_block *sb)
{ {
bdput(sb->s_bdev);
sb->s_bdev = NULL;
kfree(sb->s_fs_info); kfree(sb->s_fs_info);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
} }
...@@ -1702,6 +1779,11 @@ static struct address_space_operations shmem_aops = { ...@@ -1702,6 +1779,11 @@ static struct address_space_operations shmem_aops = {
.writepages = shmem_writepages, .writepages = shmem_writepages,
.vm_writeback = shmem_vm_writeback, .vm_writeback = shmem_vm_writeback,
.set_page_dirty = __set_page_dirty_nobuffers, .set_page_dirty = __set_page_dirty_nobuffers,
#ifdef CONFIG_TMPFS
.readpage = shmem_readpage,
.prepare_write = shmem_prepare_write,
.commit_write = simple_commit_write,
#endif
}; };
static struct file_operations shmem_file_operations = { static struct file_operations shmem_file_operations = {
...@@ -1709,7 +1791,8 @@ static struct file_operations shmem_file_operations = { ...@@ -1709,7 +1791,8 @@ static struct file_operations shmem_file_operations = {
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS
.read = shmem_file_read, .read = shmem_file_read,
.write = shmem_file_write, .write = shmem_file_write,
.fsync = shmem_sync_file, .fsync = simple_sync_file,
.sendfile = shmem_file_sendfile,
#endif #endif
}; };
...@@ -1754,15 +1837,6 @@ static struct super_block *shmem_get_sb(struct file_system_type *fs_type, ...@@ -1754,15 +1837,6 @@ static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
return get_sb_nodev(fs_type, flags, data, shmem_fill_super); return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
} }
#ifdef CONFIG_TMPFS
/* type "shm" will be tagged obsolete in 2.5 */
static struct file_system_type shmem_fs_type = {
.owner = THIS_MODULE,
.name = "shmem",
.get_sb = shmem_get_sb,
.kill_sb = kill_litter_super,
};
#endif
static struct file_system_type tmpfs_fs_type = { static struct file_system_type tmpfs_fs_type = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.name = "tmpfs", .name = "tmpfs",
...@@ -1771,10 +1845,9 @@ static struct file_system_type tmpfs_fs_type = { ...@@ -1771,10 +1845,9 @@ static struct file_system_type tmpfs_fs_type = {
}; };
static struct vfsmount *shm_mnt; static struct vfsmount *shm_mnt;
static int __init init_shmem_fs(void) static int __init init_tmpfs(void)
{ {
int error; int error;
struct vfsmount *res;
error = init_inodecache(); error = init_inodecache();
if (error) if (error)
...@@ -1786,52 +1859,31 @@ static int __init init_shmem_fs(void) ...@@ -1786,52 +1859,31 @@ static int __init init_shmem_fs(void)
goto out2; goto out2;
} }
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS
error = register_filesystem(&shmem_fs_type);
if (error) {
printk(KERN_ERR "Could not register shm fs\n");
goto out1;
}
devfs_mk_dir(NULL, "shm", NULL); devfs_mk_dir(NULL, "shm", NULL);
#endif #endif
res = kern_mount(&tmpfs_fs_type); shm_mnt = kern_mount(&tmpfs_fs_type);
if (IS_ERR (res)) { if (IS_ERR(shm_mnt)) {
error = PTR_ERR(res); error = PTR_ERR(shm_mnt);
printk(KERN_ERR "could not kern_mount tmpfs\n"); printk(KERN_ERR "Could not kern_mount tmpfs\n");
goto out; goto out1;
} }
shm_mnt = res;
/* The internal instance should not do size checking */ /* The internal instance should not do size checking */
shmem_set_size(SHMEM_SB(res->mnt_sb), ULONG_MAX, ULONG_MAX); shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
return 0; return 0;
out:
#ifdef CONFIG_TMPFS
unregister_filesystem(&shmem_fs_type);
out1: out1:
#endif
unregister_filesystem(&tmpfs_fs_type); unregister_filesystem(&tmpfs_fs_type);
out2: out2:
destroy_inodecache(); destroy_inodecache();
out3: out3:
shm_mnt = ERR_PTR(error);
return error; return error;
} }
module_init(init_tmpfs)
static void __exit exit_shmem_fs(void)
{
#ifdef CONFIG_TMPFS
unregister_filesystem(&shmem_fs_type);
#endif
unregister_filesystem(&tmpfs_fs_type);
mntput(shm_mnt);
destroy_inodecache();
}
module_init(init_shmem_fs)
module_exit(exit_shmem_fs)
/* /*
* shmem_file_setup - get an unlinked file living in shmem fs * shmem_file_setup - get an unlinked file living in tmpfs
* *
* @name: name for dentry (to be seen in /proc/<pid>/maps * @name: name for dentry (to be seen in /proc/<pid>/maps
* @size: size to be set for the file * @size: size to be set for the file
...@@ -1845,6 +1897,9 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) ...@@ -1845,6 +1897,9 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
struct dentry *dentry, *root; struct dentry *dentry, *root;
struct qstr this; struct qstr this;
if (IS_ERR(shm_mnt))
return (void *)shm_mnt;
if (size > SHMEM_MAX_BYTES) if (size > SHMEM_MAX_BYTES)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment