Commit 972b8494 authored by Linus Torvalds's avatar Linus Torvalds

Merge http://gkernel.bkbits.net/misc-2.5

into penguin.transmeta.com:/home/penguin/torvalds/repositories/kernel/linux
parents 0b818f9d 7fc97e8d
......@@ -2667,7 +2667,7 @@ under /dev. These special filesystems provide kernel interfaces that
cannot be provided with standard device nodes.
/dev/pts devpts PTY slave filesystem
/dev/shm shmfs POSIX shared memory maintenance access
/dev/shm tmpfs POSIX shared memory maintenance access
**** TERMINAL DEVICES
......
......@@ -47,10 +47,9 @@ tmpfs has the following uses:
shared memory)
3) Some people (including me) find it very convenient to mount it
e.g. on /tmp and /var/tmp and have a big swap partition. But be
aware: loop mounts of tmpfs files do not work due to the internal
design. So mkinitrd shipped by most distributions will fail with a
tmpfs /tmp.
e.g. on /tmp and /var/tmp and have a big swap partition. And now
loop mounts of tmpfs files do work, so mkinitrd shipped by most
distributions should succeed with a tmpfs /tmp.
4) And probably a lot more I do not know about :-)
......@@ -90,13 +89,9 @@ TODOs:
size=50% the tmpfs instance should be able to grow to 50 percent of
RAM + swap. So the instance should adapt automatically if you add
or remove swap space.
2) loop mounts: This is difficult since loop.c relies on the readpage
operation. This operation gets a page from the caller to be filled
with the content of the file at that position. But tmpfs always has
the page and thus cannot copy the content to the given page. So it
cannot provide this operation. The VM had to be changed seriously
to achieve this.
3) Show the number of tmpfs RAM pages. (As shared?)
2) Show the number of tmpfs RAM pages. (As shared?)
Author:
Christoph Rohland <cr@sap.com>, 1.12.01
Updated:
Hugh Dickins <hugh@veritas.com>, 17 Oct 2002
......@@ -72,7 +72,9 @@ EXPORT_SYMBOL(pfn_to_nid);
#ifdef CONFIG_X86_NUMAQ
EXPORT_SYMBOL(xquad_portio);
#endif
#ifndef CONFIG_X86_WP_WORKS_OK
EXPORT_SYMBOL(__verify_write);
#endif
EXPORT_SYMBOL(dump_thread);
EXPORT_SYMBOL(dump_fpu);
EXPORT_SYMBOL(dump_extended_fpu);
......
......@@ -375,12 +375,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
break;
}
ret = 0;
if ( !child->used_math ) {
/* Simulate an empty FPU. */
set_fpu_cwd(child, 0x037f);
set_fpu_swd(child, 0x0000);
set_fpu_twd(child, 0xffff);
}
if (!child->used_math)
init_fpu(child);
get_fpregs((struct user_i387_struct *)data, child);
break;
}
......@@ -403,13 +399,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
ret = -EIO;
break;
}
if ( !child->used_math ) {
/* Simulate an empty FPU. */
set_fpu_cwd(child, 0x037f);
set_fpu_swd(child, 0x0000);
set_fpu_twd(child, 0xffff);
set_fpu_mxcsr(child, 0x1f80);
}
if (!child->used_math)
init_fpu(child);
ret = get_fpxregs((struct user_fxsr_struct *)data, child);
break;
}
......
......@@ -54,10 +54,10 @@ static inline void save_processor_context (void)
/*
* descriptor tables
*/
asm volatile ("sgdt (%0)" : "=m" (saved_context.gdt_limit));
asm volatile ("sidt (%0)" : "=m" (saved_context.idt_limit));
asm volatile ("sldt (%0)" : "=m" (saved_context.ldt));
asm volatile ("str (%0)" : "=m" (saved_context.tr));
asm volatile ("sgdt %0" : "=m" (saved_context.gdt_limit));
asm volatile ("sidt %0" : "=m" (saved_context.idt_limit));
asm volatile ("sldt %0" : "=m" (saved_context.ldt));
asm volatile ("str %0" : "=m" (saved_context.tr));
/*
* save the general registers.
......@@ -67,22 +67,22 @@ static inline void save_processor_context (void)
* It's really not necessary, and kinda fishy (check the assembly output),
* so it's avoided.
*/
asm volatile ("movl %%esp, (%0)" : "=m" (saved_context.esp));
asm volatile ("movl %%eax, (%0)" : "=m" (saved_context.eax));
asm volatile ("movl %%ebx, (%0)" : "=m" (saved_context.ebx));
asm volatile ("movl %%ecx, (%0)" : "=m" (saved_context.ecx));
asm volatile ("movl %%edx, (%0)" : "=m" (saved_context.edx));
asm volatile ("movl %%ebp, (%0)" : "=m" (saved_context.ebp));
asm volatile ("movl %%esi, (%0)" : "=m" (saved_context.esi));
asm volatile ("movl %%edi, (%0)" : "=m" (saved_context.edi));
asm volatile ("movl %%esp, %0" : "=m" (saved_context.esp));
asm volatile ("movl %%eax, %0" : "=m" (saved_context.eax));
asm volatile ("movl %%ebx, %0" : "=m" (saved_context.ebx));
asm volatile ("movl %%ecx, %0" : "=m" (saved_context.ecx));
asm volatile ("movl %%edx, %0" : "=m" (saved_context.edx));
asm volatile ("movl %%ebp, %0" : "=m" (saved_context.ebp));
asm volatile ("movl %%esi, %0" : "=m" (saved_context.esi));
asm volatile ("movl %%edi, %0" : "=m" (saved_context.edi));
/* FIXME: Need to save XMM0..XMM15? */
/*
* segment registers
*/
asm volatile ("movw %%es, %0" : "=r" (saved_context.es));
asm volatile ("movw %%fs, %0" : "=r" (saved_context.fs));
asm volatile ("movw %%gs, %0" : "=r" (saved_context.gs));
asm volatile ("movw %%ss, %0" : "=r" (saved_context.ss));
asm volatile ("movw %%es, %0" : "=m" (saved_context.es));
asm volatile ("movw %%fs, %0" : "=m" (saved_context.fs));
asm volatile ("movw %%gs, %0" : "=m" (saved_context.gs));
asm volatile ("movw %%ss, %0" : "=m" (saved_context.ss));
/*
* control registers
......@@ -95,7 +95,7 @@ static inline void save_processor_context (void)
/*
* eflags
*/
asm volatile ("pushfl ; popl (%0)" : "=m" (saved_context.eflags));
asm volatile ("pushfl ; popl %0" : "=m" (saved_context.eflags));
}
static void
......@@ -125,9 +125,7 @@ static inline void restore_processor_context (void)
/*
* first restore %ds, so we can access our data properly
*/
asm volatile (".align 4");
asm volatile ("movw %0, %%ds" :: "r" ((u16)__KERNEL_DS));
asm volatile ("movw %0, %%ds" :: "r" (__KERNEL_DS));
/*
* control registers
......@@ -136,7 +134,7 @@ static inline void restore_processor_context (void)
asm volatile ("movl %0, %%cr3" :: "r" (saved_context.cr3));
asm volatile ("movl %0, %%cr2" :: "r" (saved_context.cr2));
asm volatile ("movl %0, %%cr0" :: "r" (saved_context.cr0));
/*
* segment registers
*/
......@@ -167,9 +165,9 @@ static inline void restore_processor_context (void)
* now restore the descriptor tables to their proper values
* ltr is done i fix_processor_context().
*/
asm volatile ("lgdt (%0)" :: "m" (saved_context.gdt_limit));
asm volatile ("lidt (%0)" :: "m" (saved_context.idt_limit));
asm volatile ("lldt (%0)" :: "m" (saved_context.ldt));
asm volatile ("lgdt %0" :: "m" (saved_context.gdt_limit));
asm volatile ("lidt %0" :: "m" (saved_context.idt_limit));
asm volatile ("lldt %0" :: "m" (saved_context.ldt));
fix_processor_context();
......
......@@ -30,17 +30,20 @@ extern void die(const char *,struct pt_regs *,long);
extern int console_loglevel;
#ifndef CONFIG_X86_WP_WORKS_OK
/*
* Ugly, ugly, but the goto's result in better assembly..
*/
int __verify_write(const void * addr, unsigned long size)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct * vma;
unsigned long start = (unsigned long) addr;
if (!size)
if (!size || segment_eq(get_fs(),KERNEL_DS))
return 1;
down_read(&mm->mmap_sem);
vma = find_vma(current->mm, start);
if (!vma)
goto bad_area;
......@@ -80,6 +83,13 @@ int __verify_write(const void * addr, unsigned long size)
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;;
}
/*
* We really need to hold mmap_sem over the whole access to
* userspace, else another thread could change permissions.
* This is unfixable, so don't use i386-class machines for
* critical servers.
*/
up_read(&mm->mmap_sem);
return 1;
check_stack:
......@@ -89,6 +99,7 @@ int __verify_write(const void * addr, unsigned long size)
goto good_area;
bad_area:
up_read(&mm->mmap_sem);
return 0;
out_of_memory:
......@@ -98,6 +109,7 @@ int __verify_write(const void * addr, unsigned long size)
}
goto bad_area;
}
#endif
/*
* Unlock any spinlocks which will prevent us from getting the
......
......@@ -378,15 +378,10 @@ void __init paging_init(void)
* This function cannot be __init, since exceptions don't work in that
* section.
*/
static int do_test_wp_bit(unsigned long vaddr);
static int do_test_wp_bit(void);
void __init test_wp_bit(void)
{
const unsigned long vaddr = PAGE_OFFSET;
pgd_t *pgd;
pmd_t *pmd;
pte_t *pte, old_pte;
if (cpu_has_pse) {
/* Ok, all PSE-capable CPUs are definitely handling the WP bit right. */
boot_cpu_data.wp_works_ok = 1;
......@@ -395,17 +390,10 @@ void __init test_wp_bit(void)
printk("Checking if this processor honours the WP bit even in supervisor mode... ");
pgd = swapper_pg_dir + __pgd_offset(vaddr);
pmd = pmd_offset(pgd, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
old_pte = *pte;
*pte = pfn_pte(0, PAGE_READONLY);
local_flush_tlb();
boot_cpu_data.wp_works_ok = do_test_wp_bit(vaddr);
*pte = old_pte;
local_flush_tlb();
/* Any page-aligned address will do, the test is non-destructive */
__set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
boot_cpu_data.wp_works_ok = do_test_wp_bit();
clear_fixmap(FIX_WP_TEST);
if (!boot_cpu_data.wp_works_ok) {
printk("No.\n");
......@@ -550,7 +538,7 @@ void __init pgtable_cache_init(void)
#endif
/* Put this after the callers, so that it cannot be inlined */
static int do_test_wp_bit(unsigned long vaddr)
static int do_test_wp_bit(void)
{
char tmp_reg;
int flag;
......@@ -564,7 +552,7 @@ static int do_test_wp_bit(unsigned long vaddr)
" .align 4 \n"
" .long 1b,2b \n"
".previous \n"
:"=m" (*(char *) vaddr),
:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
"=q" (tmp_reg),
"=r" (flag)
:"2" (1)
......
......@@ -289,7 +289,7 @@ void __init bt_iounmap(void *addr, unsigned long size)
idx = FIX_BTMAP_BEGIN;
while (nrpages > 0) {
__set_fixmap(idx, 0, __pgprot(0));
clear_fixmap(idx);
--idx;
--nrpages;
}
......
......@@ -29,58 +29,6 @@
extern void die(const char *,struct pt_regs *,long);
/*
* Ugly, ugly, but the goto's result in better assembly..
*/
int __verify_write(const void * addr, unsigned long size)
{
struct vm_area_struct * vma;
unsigned long start = (unsigned long) addr;
if (!size)
return 1;
vma = find_vma(current->mm, start);
if (!vma)
goto bad_area;
if (vma->vm_start > start)
goto check_stack;
good_area:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
size--;
size += start & ~PAGE_MASK;
size >>= PAGE_SHIFT;
start &= PAGE_MASK;
for (;;) {
if (handle_mm_fault(current->mm, vma, start, 1) <= 0)
goto bad_area;
if (!size)
break;
size--;
start += PAGE_SIZE;
if (start < vma->vm_end)
continue;
vma = vma->vm_next;
if (!vma || vma->vm_start != start)
goto bad_area;
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;;
}
return 1;
check_stack:
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
if (expand_stack(vma, start) == 0)
goto good_area;
bad_area:
return 0;
}
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
......
......@@ -432,7 +432,7 @@ static int __init rd_init (void)
disk->first_minor = i;
disk->fops = &rd_bd_op;
disk->queue = &rd_queue;
sprintf(disk->disk_name, "rd%d", i);
sprintf(disk->disk_name, "ram%d", i);
set_capacity(disk, rd_size * 2);
}
devfs_handle = devfs_mk_dir (NULL, "rd", NULL);
......
......@@ -1497,7 +1497,7 @@ int __init cm206_init(void)
goto out_disk;
disk->major = MAJOR_NR;
disk->first_minor = 0;
sprintf(disk->disk_name, "cm206");
sprintf(disk->disk_name, "cm206cd");
disk->fops = &cm206_bdops;
disk->flags = GENHD_FL_CD;
cm206_gendisk = disk;
......
......@@ -3287,13 +3287,13 @@ static int __init md_setup(char *str)
return 1;
}
extern kdev_t name_to_kdev_t(char *line) __init;
extern dev_t name_to_dev_t(char *line) __init;
void __init md_setup_drive(void)
{
int minor, i;
kdev_t dev;
dev_t dev;
mddev_t*mddev;
kdev_t devices[MD_SB_DISKS+1];
dev_t devices[MD_SB_DISKS+1];
for (minor = 0; minor < MAX_MD_DEVS; minor++) {
int err = 0;
......@@ -3312,16 +3312,17 @@ void __init md_setup_drive(void)
if (p)
*p++ = 0;
dev = name_to_kdev_t(devname);
handle = devfs_get_handle(NULL, devname, major(dev), minor(dev),
DEVFS_SPECIAL_BLK, 1);
dev = name_to_dev_t(devname);
handle = devfs_get_handle(NULL, devname,
MAJOR(dev), MINOR(dev),
DEVFS_SPECIAL_BLK, 1);
if (handle != 0) {
unsigned major, minor;
devfs_get_maj_min(handle, &major, &minor);
dev = mk_kdev(major, minor);
dev = MKDEV(major, minor);
devfs_put(handle);
}
if (kdev_none(dev)) {
if (!dev) {
printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
break;
}
......@@ -3331,7 +3332,7 @@ void __init md_setup_drive(void)
devname = p;
}
devices[i] = to_kdev_t(0);
devices[i] = 0;
if (!md_setup_args.device_set[minor])
continue;
......@@ -3375,13 +3376,13 @@ void __init md_setup_drive(void)
err = set_array_info(mddev, &ainfo);
for (i = 0; !err && i <= MD_SB_DISKS; i++) {
dev = devices[i];
if (kdev_none(dev))
if (!dev)
break;
dinfo.number = i;
dinfo.raid_disk = i;
dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
dinfo.major = major(dev);
dinfo.minor = minor(dev);
dinfo.major = MAJOR(dev);
dinfo.minor = MINOR(dev);
mddev->raid_disks++;
err = add_new_disk (mddev, &dinfo);
}
......@@ -3389,10 +3390,10 @@ void __init md_setup_drive(void)
/* persistent */
for (i = 0; i <= MD_SB_DISKS; i++) {
dev = devices[i];
if (kdev_none(dev))
if (!dev)
break;
dinfo.major = major(dev);
dinfo.minor = minor(dev);
dinfo.major = MAJOR(dev);
dinfo.minor = MINOR(dev);
add_new_disk (mddev, &dinfo);
}
}
......
......@@ -1044,7 +1044,7 @@ static int __init calc_erase_regions(struct mtd_erase_region_info *info, size_t
}
extern kdev_t name_to_kdev_t(char *line) __init;
extern dev_t name_to_dev_t(char *line) __init;
/* Startup */
static int __init init_blkmtd(void)
......@@ -1059,7 +1059,7 @@ static int __init init_blkmtd(void)
loff_t size;
int readonly = 0;
int erase_size = CONFIG_MTD_BLKDEV_ERASESIZE;
kdev_t rdev;
dev_t rdev;
struct block_device *bdev;
int err;
int mode;
......@@ -1107,17 +1107,17 @@ static int __init init_blkmtd(void)
filp_close(file, NULL);
return 1;
}
rdev = inode->i_rdev;
rdev = inode->i_bdev->bd_dev;
filp_close(file, NULL);
#else
rdev = name_to_kdev_t(device);
rdev = name_to_dev_t(device);
#endif
maj = major(rdev);
min = minor(rdev);
maj = MAJOR(rdev);
min = MINOR(rdev);
DEBUG(1, "blkmtd: found a block device major = %d, minor = %d\n", maj, min);
if(kdev_none(rdev)) {
if(!rdev) {
printk("blkmtd: bad block device: `%s'\n", device);
return 1;
}
......
......@@ -1792,9 +1792,6 @@ xfs_alloc_buftarg(
case EVMS_MAJOR:
btp->pbr_flags = PBR_ALIGNED_ONLY;
break;
case LVM_BLK_MAJOR:
btp->pbr_flags = PBR_SECTOR_ONLY;
break;
}
return btp;
......
......@@ -27,7 +27,7 @@
* Here we define all the compile-time 'special' virtual
* addresses. The point is to have a constant address at
* compile time, but to set the physical address only
* in the boot process. We allocate these special addresses
* in the boot process. We allocate these special addresses
* from the end of virtual memory (0xfffff000) backwards.
* Also this lets us do fail-safe vmalloc(), we
* can guarantee that these special addresses and
......@@ -41,13 +41,6 @@
* TLB entries of such buffers will not be flushed across
* task switches.
*/
/*
* on UP currently we will have no trace of the fixmap mechanizm,
* no page table allocations, etc. This might change in the
* future, say framebuffers for the console driver(s) could be
* fix-mapped?
*/
enum fixed_addresses {
#ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
......@@ -81,6 +74,7 @@ enum fixed_addresses {
#define NR_FIX_BTMAPS 16
FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
FIX_WP_TEST,
__end_of_fixed_addresses
};
......@@ -94,6 +88,10 @@ extern void __set_fixmap (enum fixed_addresses idx,
*/
#define set_fixmap_nocache(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
#define clear_fixmap(idx) \
__set_fixmap(idx, 0, __pgprot(0))
/*
* used by vmalloc.c.
*
......
......@@ -15,22 +15,22 @@ arch_prepare_suspend(void)
/* image of the saved processor state */
struct saved_context {
u32 eax, ebx, ecx, edx;
u32 esp, ebp, esi, edi;
u16 es, fs, gs, ss;
u32 cr0, cr2, cr3, cr4;
unsigned long eax, ebx, ecx, edx;
unsigned long esp, ebp, esi, edi;
u16 es, fs, gs, ss;
unsigned long cr0, cr2, cr3, cr4;
u16 gdt_pad;
u16 gdt_limit;
u32 gdt_base;
unsigned long gdt_base;
u16 idt_pad;
u16 idt_limit;
u32 idt_base;
unsigned long idt_base;
u16 ldt;
u16 tss;
u32 tr;
u32 safety;
u32 return_address;
u32 eflags;
unsigned long tr;
unsigned long safety;
unsigned long return_address;
unsigned long eflags;
} __attribute__((packed));
#define loaddebug(thread,register) \
......@@ -52,11 +52,11 @@ extern unsigned long saved_edi;
static inline void acpi_save_register_state(unsigned long return_point)
{
saved_eip = return_point;
asm volatile ("movl %%esp,(%0)" : "=m" (saved_esp));
asm volatile ("movl %%ebp,(%0)" : "=m" (saved_ebp));
asm volatile ("movl %%ebx,(%0)" : "=m" (saved_ebx));
asm volatile ("movl %%edi,(%0)" : "=m" (saved_edi));
asm volatile ("movl %%esi,(%0)" : "=m" (saved_esi));
asm volatile ("movl %%esp,%0" : "=m" (saved_esp));
asm volatile ("movl %%ebp,%0" : "=m" (saved_ebp));
asm volatile ("movl %%ebx,%0" : "=m" (saved_ebx));
asm volatile ("movl %%edi,%0" : "=m" (saved_edi));
asm volatile ("movl %%esi,%0" : "=m" (saved_esi));
}
#define acpi_restore_register_state() do {} while (0)
......
......@@ -64,7 +64,6 @@ int __verify_write(const void *, unsigned long);
#define access_ok(type,addr,size) ( (__range_ok(addr,size) == 0) && \
((type) == VERIFY_READ || boot_cpu_data.wp_works_ok || \
segment_eq(get_fs(),KERNEL_DS) || \
__verify_write((void *)(addr),(size))))
#endif
......
......@@ -39,6 +39,7 @@ extern void _clear_page(void *page);
#define clear_page(X) _clear_page((void *)(X))
struct page;
extern void clear_user_page(void *addr, unsigned long vaddr, struct page *page);
#define copy_page(X,Y) __memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topage);
/* GROSS, defining this makes gcc pass these types as aggregates,
......
......@@ -1239,6 +1239,7 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *);
extern ssize_t generic_file_aio_read(struct kiocb *, char *, size_t, loff_t);
......
......@@ -73,4 +73,15 @@ static inline void copy_user_highpage(struct page *to, struct page *from, unsign
kunmap_atomic(vto, KM_USER1);
}
static inline void copy_highpage(struct page *to, struct page *from)
{
char *vfrom, *vto;
vfrom = kmap_atomic(from, KM_USER0);
vto = kmap_atomic(to, KM_USER1);
copy_page(vto, vfrom);
kunmap_atomic(vfrom, KM_USER0);
kunmap_atomic(vto, KM_USER1);
}
#endif /* _LINUX_HIGHMEM_H */
......@@ -27,6 +27,7 @@ extern int get_filesystem_list(char * buf);
extern asmlinkage long sys_mount(char *dev_name, char *dir_name, char *type,
unsigned long flags, void *data);
extern asmlinkage long sys_mkdir(const char *name, int mode);
extern asmlinkage long sys_rmdir(const char *name);
extern asmlinkage long sys_chdir(const char *name);
extern asmlinkage long sys_fchdir(int fd);
extern asmlinkage long sys_chroot(const char *name);
......@@ -55,6 +56,7 @@ int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
int root_mountflags = MS_RDONLY | MS_VERBOSE;
static char root_device_name[64];
static char saved_root_name[64];
/* this is initialized in init/main.c */
dev_t ROOT_DEV;
......@@ -87,169 +89,135 @@ static int __init readwrite(char *str)
__setup("ro", readonly);
__setup("rw", readwrite);
static struct dev_name_struct {
const char *name;
const int num;
} root_dev_names[] __initdata = {
{ "nfs", 0x00ff },
{ "hda", 0x0300 },
{ "hdb", 0x0340 },
{ "loop", 0x0700 },
{ "hdc", 0x1600 },
{ "hdd", 0x1640 },
{ "hde", 0x2100 },
{ "hdf", 0x2140 },
{ "hdg", 0x2200 },
{ "hdh", 0x2240 },
{ "hdi", 0x3800 },
{ "hdj", 0x3840 },
{ "hdk", 0x3900 },
{ "hdl", 0x3940 },
{ "hdm", 0x5800 },
{ "hdn", 0x5840 },
{ "hdo", 0x5900 },
{ "hdp", 0x5940 },
{ "hdq", 0x5A00 },
{ "hdr", 0x5A40 },
{ "hds", 0x5B00 },
{ "hdt", 0x5B40 },
{ "sda", 0x0800 },
{ "sdb", 0x0810 },
{ "sdc", 0x0820 },
{ "sdd", 0x0830 },
{ "sde", 0x0840 },
{ "sdf", 0x0850 },
{ "sdg", 0x0860 },
{ "sdh", 0x0870 },
{ "sdi", 0x0880 },
{ "sdj", 0x0890 },
{ "sdk", 0x08a0 },
{ "sdl", 0x08b0 },
{ "sdm", 0x08c0 },
{ "sdn", 0x08d0 },
{ "sdo", 0x08e0 },
{ "sdp", 0x08f0 },
{ "ada", 0x1c00 },
{ "adb", 0x1c10 },
{ "adc", 0x1c20 },
{ "add", 0x1c30 },
{ "ade", 0x1c40 },
{ "fd", 0x0200 },
{ "md", 0x0900 },
{ "xda", 0x0d00 },
{ "xdb", 0x0d40 },
{ "ram", 0x0100 },
{ "scd", 0x0b00 },
{ "mcd", 0x1700 },
{ "cdu535", 0x1800 },
{ "sonycd", 0x1800 },
{ "aztcd", 0x1d00 },
{ "cm206cd", 0x2000 },
{ "gscd", 0x1000 },
{ "sbpcd", 0x1900 },
{ "eda", 0x2400 },
{ "edb", 0x2440 },
{ "pda", 0x2d00 },
{ "pdb", 0x2d10 },
{ "pdc", 0x2d20 },
{ "pdd", 0x2d30 },
{ "pcd", 0x2e00 },
{ "pf", 0x2f00 },
{ "apblock", APBLOCK_MAJOR << 8},
{ "ddv", DDV_MAJOR << 8},
{ "jsfd", JSFD_MAJOR << 8},
#if defined(CONFIG_ARCH_S390)
{ "dasda", (DASD_MAJOR << MINORBITS) },
{ "dasdb", (DASD_MAJOR << MINORBITS) + (1 << 2) },
{ "dasdc", (DASD_MAJOR << MINORBITS) + (2 << 2) },
{ "dasdd", (DASD_MAJOR << MINORBITS) + (3 << 2) },
{ "dasde", (DASD_MAJOR << MINORBITS) + (4 << 2) },
{ "dasdf", (DASD_MAJOR << MINORBITS) + (5 << 2) },
{ "dasdg", (DASD_MAJOR << MINORBITS) + (6 << 2) },
{ "dasdh", (DASD_MAJOR << MINORBITS) + (7 << 2) },
#endif
#if defined(CONFIG_BLK_CPQ_DA) || defined(CONFIG_BLK_CPQ_DA_MODULE)
{ "ida/c0d0p",0x4800 },
{ "ida/c0d1p",0x4810 },
{ "ida/c0d2p",0x4820 },
{ "ida/c0d3p",0x4830 },
{ "ida/c0d4p",0x4840 },
{ "ida/c0d5p",0x4850 },
{ "ida/c0d6p",0x4860 },
{ "ida/c0d7p",0x4870 },
{ "ida/c0d8p",0x4880 },
{ "ida/c0d9p",0x4890 },
{ "ida/c0d10p",0x48A0 },
{ "ida/c0d11p",0x48B0 },
{ "ida/c0d12p",0x48C0 },
{ "ida/c0d13p",0x48D0 },
{ "ida/c0d14p",0x48E0 },
{ "ida/c0d15p",0x48F0 },
#endif
#if defined(CONFIG_BLK_CPQ_CISS_DA) || defined(CONFIG_BLK_CPQ_CISS_DA_MODULE)
{ "cciss/c0d0p",0x6800 },
{ "cciss/c0d1p",0x6810 },
{ "cciss/c0d2p",0x6820 },
{ "cciss/c0d3p",0x6830 },
{ "cciss/c0d4p",0x6840 },
{ "cciss/c0d5p",0x6850 },
{ "cciss/c0d6p",0x6860 },
{ "cciss/c0d7p",0x6870 },
{ "cciss/c0d8p",0x6880 },
{ "cciss/c0d9p",0x6890 },
{ "cciss/c0d10p",0x68A0 },
{ "cciss/c0d11p",0x68B0 },
{ "cciss/c0d12p",0x68C0 },
{ "cciss/c0d13p",0x68D0 },
{ "cciss/c0d14p",0x68E0 },
{ "cciss/c0d15p",0x68F0 },
#endif
{ "nftla", 0x5d00 },
{ "nftlb", 0x5d10 },
{ "nftlc", 0x5d20 },
{ "nftld", 0x5d30 },
{ "ftla", 0x2c00 },
{ "ftlb", 0x2c08 },
{ "ftlc", 0x2c10 },
{ "ftld", 0x2c18 },
{ "mtdblock", 0x1f00 },
{ NULL, 0 }
};
kdev_t __init name_to_kdev_t(char *line)
{
int base = 0;
if (strncmp(line,"/dev/",5) == 0) {
struct dev_name_struct *dev = root_dev_names;
line += 5;
do {
int len = strlen(dev->name);
if (strncmp(line,dev->name,len) == 0) {
line += len;
base = dev->num;
break;
}
dev++;
} while (dev->name);
static __init dev_t try_name(char *name, int part)
{
char path[64];
char buf[32];
int range;
dev_t res;
char *s;
int len;
int fd;
/* read device number from .../dev */
sprintf(path, "/sys/bus/block/devices/%s/dev", name);
fd = open(path, 0, 0);
if (fd < 0)
goto fail;
len = read(fd, buf, 32);
close(fd);
if (len <= 0 || len == 32 || buf[len - 1] != '\n')
goto fail;
buf[len - 1] = '\0';
res = (dev_t) simple_strtoul(buf, &s, 16);
if (*s)
goto fail;
/* if it's there and we are not looking for a partition - that's it */
if (!part)
return res;
/* otherwise read range from .../range */
sprintf(path, "/sys/bus/block/devices/%s/range", name);
fd = open(path, 0, 0);
if (fd < 0)
goto fail;
len = read(fd, buf, 32);
close(fd);
if (len <= 0 || len == 32 || buf[len - 1] != '\n')
goto fail;
buf[len - 1] = '\0';
range = simple_strtoul(buf, &s, 10);
if (*s)
goto fail;
/* if partition is within range - we got it */
if (part < range)
return res + part;
fail:
return (dev_t) 0;
}
/*
* Convert a name into device number. We accept the following variants:
*
* 1) device number in hexadecimal represents itself
* 2) /dev/nfs represents Root_NFS (0xff)
* 3) /dev/<disk_name> represents the device number of disk
* 4) /dev/<disk_name><decimal> represents the device number
* of partition - device number of disk plus the partition number
* 5) /dev/<disk_name>p<decimal> - same as the above, that form is
* used when disk name of partitioned disk ends on a digit.
*
* If name doesn't have fall into the categories above, we return 0.
* Driverfs is used to check if something is a disk name - it has
* all known disks under bus/block/devices. If the disk name
* contains slashes, name of driverfs node has them replaced with
* dots. try_name() does the actual checks, assuming that driverfs
* is mounted on rootfs /sys.
*/
__init dev_t name_to_dev_t(char *name)
{
char s[32];
char *p;
dev_t res = 0;
int part;
sys_mkdir("/sys", 0700);
if (sys_mount("driverfs", "/sys", "driverfs", 0, NULL) < 0)
goto out;
if (strncmp(name, "/dev/", 5) != 0) {
res = (dev_t) simple_strtoul(name, &p, 16);
if (*p)
goto fail;
goto done;
}
return to_kdev_t(base + simple_strtoul(line,NULL,base?10:16));
name += 5;
res = Root_NFS;
if (strcmp(name, "nfs") == 0)
goto done;
if (strlen(name) > 31)
goto fail;
strcpy(s, name);
for (p = s; *p; p++)
if (*p == '/')
*p = '.';
res = try_name(s, 0);
if (res)
goto done;
while (p > s && isdigit(p[-1]))
p--;
if (p == s || !*p || *p == '0')
goto fail;
part = simple_strtoul(p, NULL, 10);
*p = '\0';
res = try_name(s, part);
if (res)
goto done;
if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
goto fail;
p[-1] = '\0';
res = try_name(s, part);
done:
sys_umount("/sys", 0);
out:
sys_rmdir("/sys");
return res;
fail:
res = (dev_t) 0;
goto done;
}
static int __init root_dev_setup(char *line)
{
int i;
char ch;
ROOT_DEV = kdev_t_to_nr(name_to_kdev_t(line));
memset (root_device_name, 0, sizeof root_device_name);
if (strncmp (line, "/dev/", 5) == 0) line += 5;
for (i = 0; i < sizeof root_device_name - 1; ++i)
{
ch = line[i];
if ( isspace (ch) || (ch == ',') || (ch == '\0') ) break;
root_device_name[i] = ch;
}
strncpy(saved_root_name, line, 64);
saved_root_name[63] = '\0';
return 1;
}
......@@ -768,6 +736,13 @@ static int __init initrd_load(void)
void prepare_namespace(void)
{
int is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
if (saved_root_name[0]) {
char *p = saved_root_name;
ROOT_DEV = name_to_dev_t(p);
if (strncmp(p, "/dev/", 5) == 0)
p += 5;
strcpy(root_device_name, p);
}
#ifdef CONFIG_BLK_DEV_INITRD
if (!initrd_start)
mount_initrd = 0;
......
......@@ -57,12 +57,13 @@
#include <linux/pm.h>
#include <linux/device.h>
#include <linux/buffer_head.h>
#include <linux/swapops.h>
#include <linux/bootmem.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/io.h>
#include <linux/swapops.h>
extern void signal_wake_up(struct task_struct *t);
extern int sys_sync(void);
......@@ -225,7 +226,7 @@ int freeze_processes(void)
todo++;
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
yield();
yield(); /* Yield is okay here */
if (time_after(jiffies, start_time + TIMEOUT)) {
printk( "\n" );
printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
......@@ -309,6 +310,9 @@ static void mark_swapfiles(swp_entry_t prev, int mode)
union diskpage *cur;
struct page *page;
if (root_swap == 0xFFFF) /* ignored */
return;
page = alloc_page(GFP_ATOMIC);
if (!page)
panic("Out of memory in mark_swapfiles");
......@@ -474,9 +478,9 @@ static int count_and_copy_data_pages(struct pbe *pagedir_p)
#ifdef CONFIG_DISCONTIGMEM
panic("Discontingmem not supported");
#else
BUG_ON (max_mapnr != num_physpages);
BUG_ON (max_pfn != num_physpages);
#endif
for (pfn = 0; pfn < max_mapnr; pfn++) {
for (pfn = 0; pfn < max_pfn; pfn++) {
page = pfn_to_page(pfn);
if (PageHighMem(page))
panic("Swsusp not supported on highmem boxes. Send 1GB of RAM to <pavel@ucw.cz> and try again ;-).");
......@@ -686,6 +690,7 @@ static int suspend_save_image(void)
if(nr_free_pages() < nr_needed_pages) {
printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
name_suspend, nr_needed_pages-nr_free_pages());
root_swap = 0xFFFF;
spin_unlock_irq(&suspend_pagedir_lock);
return 1;
}
......@@ -1042,7 +1047,7 @@ static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
return 0;
}
extern kdev_t __init name_to_kdev_t(const char *line);
extern dev_t __init name_to_dev_t(const char *line);
static int __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
{
......@@ -1138,7 +1143,7 @@ static int read_suspend_image(const char * specialfile, int noresume)
unsigned long scratch_page = 0;
int error;
resume_device = name_to_kdev_t(specialfile);
resume_device = to_kdev_t(name_to_dev_t(specialfile));
scratch_page = get_zeroed_page(GFP_ATOMIC);
cur = (void *) scratch_page;
if (cur) {
......
......@@ -916,7 +916,7 @@ generic_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
return ret;
}
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
ssize_t written;
unsigned long count = desc->count;
......
......@@ -51,6 +51,16 @@
/* Keep swapped page count in private field of indirect struct page */
#define nr_swapped private
/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
SGP_WRITE, /* may exceed i_size, may allocate page */
};
static int shmem_getpage(struct inode *inode, unsigned long idx,
struct page **pagep, enum sgp_type sgp);
static inline struct page *shmem_dir_alloc(unsigned int gfp_mask)
{
/*
......@@ -132,8 +142,7 @@ static void shmem_free_block(struct inode *inode)
* @inode: inode to recalc
*
* We have to calculate the free blocks since the mm can drop
* undirtied hole pages behind our back. Later we should be
* able to use the releasepage method to handle this better.
* undirtied hole pages behind our back.
*
* But normally info->alloced == inode->i_mapping->nrpages + info->swapped
* So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
......@@ -200,8 +209,6 @@ static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long
struct page **dir;
struct page *subdir;
if (index >= info->next_index)
return NULL;
if (index < SHMEM_NR_DIRECT)
return info->i_direct+index;
if (!info->i_indirect) {
......@@ -274,20 +281,23 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
*
* @info: info structure for the inode
* @index: index of the page to find
* @sgp: check and recheck i_size? skip allocation?
*/
static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index)
static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
{
struct inode *inode = &info->vfs_inode;
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
struct page *page = NULL;
swp_entry_t *entry;
static const swp_entry_t unswapped = {0};
while (!(entry = shmem_swp_entry(info, index, &page))) {
if (index >= info->next_index) {
entry = ERR_PTR(-EFAULT);
break;
}
if (sgp != SGP_WRITE &&
((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size)
return ERR_PTR(-EINVAL);
while (!(entry = shmem_swp_entry(info, index, &page))) {
if (sgp == SGP_READ)
return (swp_entry_t *) &unswapped;
/*
* Test free_blocks against 1 not 0, since we have 1 data
* page (and perhaps indirect index pages) yet to allocate:
......@@ -314,12 +324,21 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
shmem_free_block(inode);
return ERR_PTR(-ENOMEM);
}
if (sgp != SGP_WRITE &&
((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) {
entry = ERR_PTR(-EINVAL);
break;
}
if (info->next_index <= index)
info->next_index = index + 1;
}
if (page) {
/* another task gave its page, or truncated the file */
shmem_free_block(inode);
shmem_dir_free(page);
}
if (info->next_index <= index && !IS_ERR(entry))
info->next_index = index + 1;
return entry;
}
......@@ -470,7 +489,6 @@ static void shmem_truncate(struct inode *inode)
static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
{
static struct page *shmem_holdpage(struct inode *, unsigned long);
struct inode *inode = dentry->d_inode;
struct page *page = NULL;
long change = 0;
......@@ -495,8 +513,9 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
* it assigned to swap.
*/
if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
page = shmem_holdpage(inode,
attr->ia_size >> PAGE_CACHE_SHIFT);
(void) shmem_getpage(inode,
attr->ia_size>>PAGE_CACHE_SHIFT,
&page, SGP_READ);
}
}
}
......@@ -672,6 +691,7 @@ static int shmem_writepage(struct page *page)
spin_lock(&info->lock);
shmem_recalc_inode(inode);
BUG_ON(index >= info->next_index);
entry = shmem_swp_entry(info, index, NULL);
BUG_ON(!entry);
BUG_ON(entry->val);
......@@ -710,57 +730,55 @@ static int shmem_vm_writeback(struct page *page, struct writeback_control *wbc)
* vm. If we swap it in we mark it dirty since we also free the swap
* entry since a page cannot live in both the swap and page cache
*/
static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep)
static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo;
struct page *page;
struct page *filepage = *pagep;
struct page *swappage;
swp_entry_t *entry;
swp_entry_t swap;
int error = 0;
int error;
if (idx >= SHMEM_MAX_INDEX)
return -EFBIG;
/*
* When writing, i_sem is held against truncation and other
* writing, so next_index will remain as set here; but when
* reading, idx must always be checked against next_index
* after sleeping, lest truncation occurred meanwhile.
* Normally, filepage is NULL on entry, and either found
* uptodate immediately, or allocated and zeroed, or read
* in under swappage, which is then assigned to filepage.
* But shmem_readpage and shmem_prepare_write pass in a locked
* filepage, which may be found not uptodate by other callers
* too, and may need to be copied from the swappage read in.
*/
spin_lock(&info->lock);
if (info->next_index <= idx)
info->next_index = idx + 1;
spin_unlock(&info->lock);
repeat:
page = find_lock_page(mapping, idx);
if (page) {
*pagep = page;
return 0;
}
if (!filepage)
filepage = find_lock_page(mapping, idx);
if (filepage && PageUptodate(filepage))
goto done;
error = 0;
spin_lock(&info->lock);
shmem_recalc_inode(inode);
entry = shmem_swp_alloc(info, idx);
entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry)) {
spin_unlock(&info->lock);
return PTR_ERR(entry);
error = PTR_ERR(entry);
goto failed;
}
swap = *entry;
if (swap.val) {
/* Look it up and read it in.. */
page = lookup_swap_cache(swap);
if (!page) {
swappage = lookup_swap_cache(swap);
if (!swappage) {
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
swapin_readahead(swap);
page = read_swap_cache_async(swap);
if (!page) {
swappage = read_swap_cache_async(swap);
if (!swappage) {
spin_lock(&info->lock);
entry = shmem_swp_alloc(info, idx);
entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry))
error = PTR_ERR(entry);
else {
......@@ -770,125 +788,152 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **p
}
spin_unlock(&info->lock);
if (error)
return error;
goto failed;
goto repeat;
}
wait_on_page_locked(page);
page_cache_release(page);
wait_on_page_locked(swappage);
page_cache_release(swappage);
goto repeat;
}
/* We have to do this with page locked to prevent races */
if (TestSetPageLocked(page)) {
if (TestSetPageLocked(swappage)) {
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
wait_on_page_locked(page);
page_cache_release(page);
wait_on_page_locked(swappage);
page_cache_release(swappage);
goto repeat;
}
if (PageWriteback(page)) {
if (PageWriteback(swappage)) {
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
wait_on_page_writeback(page);
unlock_page(page);
page_cache_release(page);
wait_on_page_writeback(swappage);
unlock_page(swappage);
page_cache_release(swappage);
goto repeat;
}
error = PageUptodate(page)?
move_from_swap_cache(page, idx, mapping): -EIO;
if (error) {
if (!PageUptodate(swappage)) {
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
unlock_page(page);
page_cache_release(page);
return error;
unlock_page(swappage);
page_cache_release(swappage);
error = -EIO;
goto failed;
}
shmem_swp_set(info, entry, 0);
if (filepage) {
shmem_swp_set(info, entry, 0);
shmem_swp_unmap(entry);
delete_from_swap_cache(swappage);
spin_unlock(&info->lock);
flush_page_to_ram(swappage);
copy_highpage(filepage, swappage);
unlock_page(swappage);
page_cache_release(swappage);
flush_dcache_page(filepage);
SetPageUptodate(filepage);
set_page_dirty(filepage);
swap_free(swap);
} else if (!(error = move_from_swap_cache(
swappage, idx, mapping))) {
shmem_swp_set(info, entry, 0);
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
filepage = swappage;
swap_free(swap);
} else {
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
unlock_page(swappage);
page_cache_release(swappage);
if (error != -EEXIST)
goto failed;
goto repeat;
}
} else if (sgp == SGP_READ && !filepage) {
shmem_swp_unmap(entry);
filepage = find_get_page(mapping, idx);
if (filepage &&
(!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
spin_unlock(&info->lock);
wait_on_page_locked(filepage);
page_cache_release(filepage);
filepage = NULL;
goto repeat;
}
spin_unlock(&info->lock);
swap_free(swap);
} else {
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
sbinfo = SHMEM_SB(inode->i_sb);
spin_lock(&sbinfo->stat_lock);
if (sbinfo->free_blocks == 0) {
spin_unlock(&sbinfo->stat_lock);
return -ENOSPC;
spin_unlock(&info->lock);
error = -ENOSPC;
goto failed;
}
sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
page = page_cache_alloc(mapping);
if (!page) {
shmem_free_block(inode);
return -ENOMEM;
}
spin_lock(&info->lock);
entry = shmem_swp_alloc(info, idx);
if (IS_ERR(entry))
error = PTR_ERR(entry);
else {
swap = *entry;
shmem_swp_unmap(entry);
}
if (error || swap.val ||
add_to_page_cache_lru(page, mapping, idx) < 0) {
if (!filepage) {
spin_unlock(&info->lock);
page_cache_release(page);
shmem_free_block(inode);
if (error)
return error;
goto repeat;
filepage = page_cache_alloc(mapping);
if (!filepage) {
shmem_free_block(inode);
error = -ENOMEM;
goto failed;
}
spin_lock(&info->lock);
entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry))
error = PTR_ERR(entry);
else {
swap = *entry;
shmem_swp_unmap(entry);
}
if (error || swap.val ||
(error = add_to_page_cache_lru(
filepage, mapping, idx))) {
spin_unlock(&info->lock);
page_cache_release(filepage);
shmem_free_block(inode);
filepage = NULL;
if (error != -EEXIST)
goto failed;
goto repeat;
}
}
info->alloced++;
spin_unlock(&info->lock);
clear_highpage(page);
SetPageUptodate(page);
clear_highpage(filepage);
flush_dcache_page(filepage);
SetPageUptodate(filepage);
}
done:
if (!*pagep) {
if (filepage) {
unlock_page(filepage);
*pagep = filepage;
} else
*pagep = ZERO_PAGE(0);
}
/* We have the page */
*pagep = page;
return 0;
}
static struct page *shmem_holdpage(struct inode *inode, unsigned long idx)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct page *page;
swp_entry_t *entry;
swp_entry_t swap = {0};
/*
* Somehow, it feels wrong for truncation down to cause any
* allocation: so instead of a blind shmem_getpage, check that
* the page has actually been instantiated before holding it.
*/
spin_lock(&info->lock);
page = find_get_page(inode->i_mapping, idx);
if (!page) {
entry = shmem_swp_entry(info, idx, NULL);
if (entry) {
swap = *entry;
shmem_swp_unmap(entry);
}
}
spin_unlock(&info->lock);
if (swap.val) {
if (shmem_getpage(inode, idx, &page) == 0)
unlock_page(page);
failed:
if (*pagep != filepage) {
unlock_page(filepage);
page_cache_release(filepage);
}
return page;
return error;
}
struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
{
struct inode *inode = vma->vm_file->f_dentry->d_inode;
struct page *page;
struct page *page = NULL;
unsigned long idx;
int error;
......@@ -896,14 +941,10 @@ struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int
idx += vma->vm_pgoff;
idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
if (((loff_t) idx << PAGE_CACHE_SHIFT) >= inode->i_size)
return NOPAGE_SIGBUS;
error = shmem_getpage(inode, idx, &page);
error = shmem_getpage(inode, idx, &page, SGP_CACHE);
if (error)
return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
unlock_page(page);
flush_page_to_ram(page);
return page;
}
......@@ -1017,13 +1058,33 @@ static int shmem_set_size(struct shmem_sb_info *info,
static struct inode_operations shmem_symlink_inode_operations;
static struct inode_operations shmem_symlink_inline_operations;
/*
* tmpfs itself makes no use of generic_file_read, generic_file_mmap
* or generic_file_write; but shmem_readpage, shmem_prepare_write and
* simple_commit_write let a tmpfs file be used below the loop driver.
*/
static int
shmem_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
int error = shmem_getpage(inode, page->index, &page, SGP_CACHE);
unlock_page(page);
return error;
}
static int
shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
struct inode *inode = page->mapping->host;
return shmem_getpage(inode, page->index, &page, SGP_WRITE);
}
static ssize_t
shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
{
struct inode *inode = file->f_dentry->d_inode;
unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
loff_t pos;
struct page *page;
unsigned long written;
long status;
int err;
......@@ -1073,12 +1134,46 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
send_sig(SIGXFSZ, current, 0);
goto out;
}
if (count > limit - pos) {
if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) {
/* send_sig(SIGXFSZ, current, 0); */
count = limit - (u32)pos;
}
}
/*
* LFS rule
*/
if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
if (pos >= MAX_NON_LFS) {
send_sig(SIGXFSZ, current, 0);
count = limit - pos;
goto out;
}
if (count > MAX_NON_LFS - (u32)pos) {
/* send_sig(SIGXFSZ, current, 0); */
count = MAX_NON_LFS - (u32)pos;
}
}
/*
* Are we about to exceed the fs block limit ?
*
* If we have written data it becomes a short write
* If we have exceeded without writing data we send
* a signal and give them an EFBIG.
*
* Linus frestrict idea will clean these up nicely..
*/
if (pos >= SHMEM_MAX_BYTES) {
if (count || pos > SHMEM_MAX_BYTES) {
send_sig(SIGXFSZ, current, 0);
err = -EFBIG;
goto out;
}
/* zero-length writes at ->s_maxbytes are OK */
}
if (pos + count > SHMEM_MAX_BYTES)
count = SHMEM_MAX_BYTES - pos;
status = 0;
if (count) {
remove_suid(file->f_dentry);
......@@ -1086,61 +1181,65 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
}
while (count) {
struct page *page = NULL;
unsigned long bytes, index, offset;
char *kaddr;
int left;
/*
* Try to find the page in the cache. If it isn't there,
* allocate a free page.
*/
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
index = pos >> PAGE_CACHE_SHIFT;
bytes = PAGE_CACHE_SIZE - offset;
if (bytes > count) {
if (bytes > count)
bytes = count;
}
/*
* Bring in the user page that we will copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
* same page as we're writing to, without it being marked
* up-to-date.
* We don't hold page lock across copy from user -
* what would it guard against? - so no deadlock here.
* But it still may be a good idea to prefault below.
*/
{ volatile unsigned char dummy;
__get_user(dummy, buf);
__get_user(dummy, buf+bytes-1);
}
status = shmem_getpage(inode, index, &page);
status = shmem_getpage(inode, index, &page, SGP_WRITE);
if (status)
break;
kaddr = kmap(page);
status = __copy_from_user(kaddr+offset, buf, bytes);
kunmap(page);
if (status)
goto fail_write;
left = bytes;
if (PageHighMem(page)) {
volatile unsigned char dummy;
__get_user(dummy, buf);
__get_user(dummy, buf + bytes - 1);
kaddr = kmap_atomic(page, KM_USER0);
left = __copy_from_user(kaddr + offset, buf, bytes);
kunmap_atomic(kaddr, KM_USER0);
}
if (left) {
kaddr = kmap(page);
left = __copy_from_user(kaddr + offset, buf, bytes);
kunmap(page);
}
flush_dcache_page(page);
if (bytes > 0) {
set_page_dirty(page);
written += bytes;
count -= bytes;
pos += bytes;
buf += bytes;
if (pos > inode->i_size)
inode->i_size = pos;
if (left) {
page_cache_release(page);
status = -EFAULT;
break;
}
unlock:
/* Mark it unlocked again and drop the page.. */
unlock_page(page);
set_page_dirty(page);
page_cache_release(page);
if (status < 0)
break;
/*
* Balance dirty pages??
*/
written += bytes;
count -= bytes;
pos += bytes;
buf += bytes;
if (pos > inode->i_size)
inode->i_size = pos;
}
*ppos = pos;
*ppos = pos;
err = written ? written : status;
out:
/* Short writes give back address space */
......@@ -1149,25 +1248,20 @@ shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
out_nc:
up(&inode->i_sem);
return err;
fail_write:
status = -EFAULT;
ClearPageUptodate(page);
goto unlock;
}
static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
{
struct inode *inode = filp->f_dentry->d_inode;
struct address_space *mapping = inode->i_mapping;
unsigned long index, offset;
int nr = 1;
index = *ppos >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK;
while (nr && desc->count) {
struct page *page;
unsigned long end_index, nr;
for (;;) {
struct page *page = NULL;
unsigned long end_index, nr, ret;
end_index = inode->i_size >> PAGE_CACHE_SHIFT;
if (index > end_index)
......@@ -1178,9 +1272,9 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
break;
}
desc->error = shmem_getpage(inode, index, &page);
desc->error = shmem_getpage(inode, index, &page, SGP_READ);
if (desc->error) {
if (desc->error == -EFAULT)
if (desc->error == -EINVAL)
desc->error = 0;
break;
}
......@@ -1194,15 +1288,18 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
if (index == end_index) {
nr = inode->i_size & ~PAGE_CACHE_MASK;
if (nr <= offset) {
unlock_page(page);
page_cache_release(page);
break;
}
}
unlock_page(page);
nr -= offset;
if (!list_empty(&mapping->i_mmap_shared))
/* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing
* before reading the page on the kernel side.
*/
if (!list_empty(&mapping->i_mmap_shared) &&
page != ZERO_PAGE(0))
flush_dcache_page(page);
/*
......@@ -1215,12 +1312,14 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
* "pos" here (the actor routine has to update the user buffer
* pointers and the remaining count).
*/
nr = file_read_actor(desc, page, offset, nr);
offset += nr;
ret = actor(desc, page, offset, nr);
offset += ret;
index += offset >> PAGE_CACHE_SHIFT;
offset &= ~PAGE_CACHE_MASK;
page_cache_release(page);
if (ret != nr || !desc->count)
break;
}
*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
......@@ -1229,27 +1328,43 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
{
ssize_t retval;
read_descriptor_t desc;
retval = -EFAULT;
if (access_ok(VERIFY_WRITE, buf, count)) {
retval = 0;
if ((ssize_t) count < 0)
return -EINVAL;
if (!access_ok(VERIFY_WRITE, buf, count))
return -EFAULT;
if (!count)
return 0;
if (count) {
read_descriptor_t desc;
desc.written = 0;
desc.count = count;
desc.buf = buf;
desc.error = 0;
desc.written = 0;
desc.count = count;
desc.buf = buf;
desc.error = 0;
do_shmem_file_read(filp, ppos, &desc);
do_shmem_file_read(filp, ppos, &desc, file_read_actor);
if (desc.written)
return desc.written;
return desc.error;
}
retval = desc.written;
if (!retval)
retval = desc.error;
}
}
return retval;
static ssize_t shmem_file_sendfile(struct file *out_file,
struct file *in_file, loff_t *ppos, size_t count)
{
read_descriptor_t desc;
if (!count)
return 0;
desc.written = 0;
desc.count = count;
desc.buf = (char *)out_file;
desc.error = 0;
do_shmem_file_read(in_file, ppos, &desc, file_send_actor);
if (desc.written)
return desc.written;
return desc.error;
}
static int shmem_statfs(struct super_block *sb, struct statfs *buf)
......@@ -1317,39 +1432,6 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
return 0;
}
static inline int shmem_positive(struct dentry *dentry)
{
return dentry->d_inode && !d_unhashed(dentry);
}
/*
* Check that a directory is empty (this works
* for regular files too, they'll just always be
* considered empty..).
*
* Note that an empty directory can still have
* children, they just all have to be negative..
*/
static int shmem_empty(struct dentry *dentry)
{
struct list_head *list;
spin_lock(&dcache_lock);
list = dentry->d_subdirs.next;
while (list != &dentry->d_subdirs) {
struct dentry *de = list_entry(list, struct dentry, d_child);
if (shmem_positive(de)) {
spin_unlock(&dcache_lock);
return 0;
}
list = list->next;
}
spin_unlock(&dcache_lock);
return 1;
}
static int shmem_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
......@@ -1363,7 +1445,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
if (!shmem_empty(dentry))
if (!simple_empty(dentry))
return -ENOTEMPTY;
dir->i_nlink--;
......@@ -1381,7 +1463,7 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct
struct inode *inode = old_dentry->d_inode;
int they_are_dirs = S_ISDIR(inode->i_mode);
if (!shmem_empty(new_dentry))
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
if (new_dentry->d_inode) {
......@@ -1406,7 +1488,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
int error;
int len;
struct inode *inode;
struct page *page;
struct page *page = NULL;
char *kaddr;
struct shmem_inode_info *info;
......@@ -1429,7 +1511,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
iput(inode);
return -ENOMEM;
}
error = shmem_getpage(inode, 0, &page);
error = shmem_getpage(inode, 0, &page, SGP_WRITE);
if (error) {
vm_unacct_memory(VM_ACCT(1));
iput(inode);
......@@ -1439,11 +1521,10 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
spin_lock(&shmem_ilock);
list_add_tail(&info->list, &shmem_inodes);
spin_unlock(&shmem_ilock);
kaddr = kmap(page);
kaddr = kmap_atomic(page, KM_USER0);
memcpy(kaddr, symname, len);
kunmap(page);
kunmap_atomic(kaddr, KM_USER0);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
}
dir->i_size += BOGO_DIRENT_SIZE;
......@@ -1465,26 +1546,24 @@ static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
{
struct page *page;
int res = shmem_getpage(dentry->d_inode, 0, &page);
struct page *page = NULL;
int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
if (res)
return res;
res = vfs_readlink(dentry, buffer, buflen, kmap(page));
kunmap(page);
unlock_page(page);
page_cache_release(page);
return res;
}
static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct page *page;
int res = shmem_getpage(dentry->d_inode, 0, &page);
struct page *page = NULL;
int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
if (res)
return res;
res = vfs_follow_link(nd, kmap(page));
kunmap(page);
unlock_page(page);
page_cache_release(page);
return res;
}
......@@ -1569,15 +1648,10 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
unsigned long max_blocks = sbinfo->max_blocks;
unsigned long max_inodes = sbinfo->max_inodes;
if (shmem_parse_options (data, NULL, NULL, NULL, &max_blocks, &max_inodes))
if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
return -EINVAL;
return shmem_set_size(sbinfo, max_blocks, max_inodes);
}
int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync)
{
return 0;
}
#endif
static int shmem_fill_super(struct super_block *sb, void *data, int silent)
......@@ -1590,7 +1664,7 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
gid_t gid = current->fsgid;
struct shmem_sb_info *sbinfo;
struct sysinfo si;
int err;
int err = -ENOMEM;
sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
if (!sbinfo)
......@@ -1606,7 +1680,7 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
blocks = inodes = si.totalram / 2;
#ifdef CONFIG_TMPFS
if (shmem_parse_options (data, &mode, &uid, &gid, &blocks, &inodes)) {
if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) {
err = -EINVAL;
goto failed;
}
......@@ -1620,28 +1694,29 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
sbinfo->max_inodes = inodes;
sbinfo->free_inodes = inodes;
sb->s_maxbytes = SHMEM_MAX_BYTES;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_bdev = bdget(sb->s_dev);
if (!sb->s_bdev)
goto failed;
if (!sb_set_blocksize(sb, PAGE_CACHE_SIZE))
BUG();
sb->s_magic = TMPFS_MAGIC;
sb->s_op = &shmem_ops;
inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
if (!inode) {
err = -ENOMEM;
goto failed;
}
if (!inode)
goto failed_bdput;
inode->i_uid = uid;
inode->i_gid = gid;
root = d_alloc_root(inode);
if (!root) {
err = -ENOMEM;
if (!root)
goto failed_iput;
}
sb->s_root = root;
return 0;
failed_iput:
iput(inode);
failed_bdput:
bdput(sb->s_bdev);
sb->s_bdev = NULL;
failed:
kfree(sbinfo);
sb->s_fs_info = NULL;
......@@ -1650,6 +1725,8 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
static void shmem_put_super(struct super_block *sb)
{
bdput(sb->s_bdev);
sb->s_bdev = NULL;
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
......@@ -1702,6 +1779,11 @@ static struct address_space_operations shmem_aops = {
.writepages = shmem_writepages,
.vm_writeback = shmem_vm_writeback,
.set_page_dirty = __set_page_dirty_nobuffers,
#ifdef CONFIG_TMPFS
.readpage = shmem_readpage,
.prepare_write = shmem_prepare_write,
.commit_write = simple_commit_write,
#endif
};
static struct file_operations shmem_file_operations = {
......@@ -1709,7 +1791,8 @@ static struct file_operations shmem_file_operations = {
#ifdef CONFIG_TMPFS
.read = shmem_file_read,
.write = shmem_file_write,
.fsync = shmem_sync_file,
.fsync = simple_sync_file,
.sendfile = shmem_file_sendfile,
#endif
};
......@@ -1754,15 +1837,6 @@ static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
}
#ifdef CONFIG_TMPFS
/* type "shm" will be tagged obsolete in 2.5 */
static struct file_system_type shmem_fs_type = {
.owner = THIS_MODULE,
.name = "shmem",
.get_sb = shmem_get_sb,
.kill_sb = kill_litter_super,
};
#endif
static struct file_system_type tmpfs_fs_type = {
.owner = THIS_MODULE,
.name = "tmpfs",
......@@ -1771,10 +1845,9 @@ static struct file_system_type tmpfs_fs_type = {
};
static struct vfsmount *shm_mnt;
static int __init init_shmem_fs(void)
static int __init init_tmpfs(void)
{
int error;
struct vfsmount *res;
error = init_inodecache();
if (error)
......@@ -1786,52 +1859,31 @@ static int __init init_shmem_fs(void)
goto out2;
}
#ifdef CONFIG_TMPFS
error = register_filesystem(&shmem_fs_type);
if (error) {
printk(KERN_ERR "Could not register shm fs\n");
goto out1;
}
devfs_mk_dir(NULL, "shm", NULL);
#endif
res = kern_mount(&tmpfs_fs_type);
if (IS_ERR (res)) {
error = PTR_ERR(res);
printk(KERN_ERR "could not kern_mount tmpfs\n");
goto out;
shm_mnt = kern_mount(&tmpfs_fs_type);
if (IS_ERR(shm_mnt)) {
error = PTR_ERR(shm_mnt);
printk(KERN_ERR "Could not kern_mount tmpfs\n");
goto out1;
}
shm_mnt = res;
/* The internal instance should not do size checking */
shmem_set_size(SHMEM_SB(res->mnt_sb), ULONG_MAX, ULONG_MAX);
shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
return 0;
out:
#ifdef CONFIG_TMPFS
unregister_filesystem(&shmem_fs_type);
out1:
#endif
unregister_filesystem(&tmpfs_fs_type);
out2:
destroy_inodecache();
out3:
shm_mnt = ERR_PTR(error);
return error;
}
static void __exit exit_shmem_fs(void)
{
#ifdef CONFIG_TMPFS
unregister_filesystem(&shmem_fs_type);
#endif
unregister_filesystem(&tmpfs_fs_type);
mntput(shm_mnt);
destroy_inodecache();
}
module_init(init_shmem_fs)
module_exit(exit_shmem_fs)
module_init(init_tmpfs)
/*
* shmem_file_setup - get an unlinked file living in shmem fs
* shmem_file_setup - get an unlinked file living in tmpfs
*
* @name: name for dentry (to be seen in /proc/<pid>/maps
* @size: size to be set for the file
......@@ -1845,6 +1897,9 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
struct dentry *dentry, *root;
struct qstr this;
if (IS_ERR(shm_mnt))
return (void *)shm_mnt;
if (size > SHMEM_MAX_BYTES)
return ERR_PTR(-EINVAL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment