Commit d7cc008e authored by Linus Torvalds's avatar Linus Torvalds

Linux 2.1.127pre1

I have an alternate patch for low memory circumstances that I'd like you
to test out.
The problem with the old kswapd setup was at least partly that kswapd was
woken up too late - by the time kswapd was woken up, it really had to work
fairly hard. Also, kswapd really shouldn't be real-time at all: normally
it should just be a fairly low-priority process, and the priority should
grow as there is more urgent need for memory.
This alternate approach seems to work for me, and is designed to avoid the
"spikes" of heavy real-time kswapd activity during which the machine is
fairly unusable in the old scheme.

                Linus
parent 76df47b0
IDE-CD driver documentation IDE-CD driver documentation
Originally by scott snyder <snyder@fnald0.fnal.gov> (19 May 1996) Originally by scott snyder <snyder@fnald0.fnal.gov> (19 May 1996)
Carrying on the torch is: Erik Andersen <andersee@debian.org> Carrying on the torch is: Erik Andersen <andersee@debian.org>
New maintainers (19 Oct 1998): Jens Axboe <axboe@image.dk>
Chris Zwilling <chris@cloudnet.com>
1. Introduction 1. Introduction
--------------- ---------------
......
...@@ -328,8 +328,10 @@ L: linux-kernel@vger.rutgers.edu ...@@ -328,8 +328,10 @@ L: linux-kernel@vger.rutgers.edu
S: Odd Fixes S: Odd Fixes
IDE/ATAPI CDROM DRIVER IDE/ATAPI CDROM DRIVER
P: Erik Andersen P: Jens Axboe
M: andersee@debian.org M: axboe@image.dk
P: Chris Zwilling
M: chris@cloudnet.com
L: linux-kernel@vger.rutgers.edu L: linux-kernel@vger.rutgers.edu
S: Maintained S: Maintained
...@@ -671,8 +673,10 @@ L: linux-kernel@vger.rutgers.edu ...@@ -671,8 +673,10 @@ L: linux-kernel@vger.rutgers.edu
S: Maintained S: Maintained
UNIFORM CDROM DRIVER UNIFORM CDROM DRIVER
P: Erik Andersen P: Jens Axboe
M: andersee@debian.org M: axboe@image.dk
P: Chris Zwilling
M: chris@cloudnet.com
L: linux-kernel@vger.rutgers.edu L: linux-kernel@vger.rutgers.edu
S: Maintained S: Maintained
......
VERSION = 2 VERSION = 2
PATCHLEVEL = 1 PATCHLEVEL = 1
SUBLEVEL = 126 SUBLEVEL = 127
ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
* linux/drivers/block/ide-cd.c * linux/drivers/block/ide-cd.c
* Copyright (C) 1994, 1995, 1996 scott snyder <snyder@fnald0.fnal.gov> * Copyright (C) 1994, 1995, 1996 scott snyder <snyder@fnald0.fnal.gov>
* Copyright (C) 1996-1998 Erik Andersen <andersee@debian.org> * Copyright (C) 1996-1998 Erik Andersen <andersee@debian.org>
* Copyright (C) 1998 Jens Axboe and Chris Zwilling
* *
* May be copied or modified under the terms of the GNU General Public * May be copied or modified under the terms of the GNU General Public
* License. See linux/COPYING for more information. * License. See linux/COPYING for more information.
...@@ -221,9 +222,13 @@ ...@@ -221,9 +222,13 @@
* 4.15 Aug 25, 1998 -- Updated ide-cd.h to respect mechine endianess, * 4.15 Aug 25, 1998 -- Updated ide-cd.h to respect mechine endianess,
* patch thanks to "Eddie C. Dost" <ecd@skynet.be> * patch thanks to "Eddie C. Dost" <ecd@skynet.be>
* *
* 4.50 Oct 19, 1998 -- New maintainers!
* Jens Axboe <axboe@image.dk>
* Chris Zwilling <chris@cloudnet.com>
*
*************************************************************************/ *************************************************************************/
#define IDECD_VERSION "4.15" #define IDECD_VERSION "4.50"
#include <linux/module.h> #include <linux/module.h>
#include <linux/types.h> #include <linux/types.h>
...@@ -2632,7 +2637,7 @@ int ide_cdrom_select_disc (struct cdrom_device_info *cdi, int slot) ...@@ -2632,7 +2637,7 @@ int ide_cdrom_select_disc (struct cdrom_device_info *cdi, int slot)
if (was_locked) if (was_locked)
(void) cdrom_lockdoor (drive, 1, NULL); (void) cdrom_lockdoor (drive, 1, NULL);
return stat; return slot;
} }
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* linux/drivers/block/ide_modes.h * linux/drivers/block/ide_modes.h
* *
* Copyright (C) 1996 Erik Andersen * Copyright (C) 1996 Erik Andersen
* Copyright (C) 1998 Jens Axboe and Chris Zwilling
*/ */
#include <asm/byteorder.h> #include <asm/byteorder.h>
......
/* linux/drivers/cdrom/cdrom.c. /* linux/drivers/cdrom/cdrom.c.
Copyright (c) 1996, 1997 David A. van Leeuwen. Copyright (c) 1996, 1997 David A. van Leeuwen.
Copyright (c) 1997, 1998 Erik Andersen <andersee@debian.org> Copyright (c) 1997, 1998 Erik Andersen <andersee@debian.org>
Copyright (c) 1998 Jens Axboe and Chris Zwilling
May be copied or modified under the terms of the GNU General Public May be copied or modified under the terms of the GNU General Public
License. See linux/COPYING for more information. License. See linux/COPYING for more information.
...@@ -86,10 +87,15 @@ ...@@ -86,10 +87,15 @@
Thanks to Grant R. Guenther <grant@torque.net> for spotting this bug. Thanks to Grant R. Guenther <grant@torque.net> for spotting this bug.
-- Made a few things more pedanticly correct. -- Made a few things more pedanticly correct.
2.50 Oct 19, 1998 - Jens Axboe <axboe@image.dk>
-- New maintainers! Erik was too busy to continue the work on the driver,
so now Chris Zwilling <chris@cloudnet.com> and Jens Axboe <axboe@image.dk>
will do their best to follow in his footsteps
-------------------------------------------------------------------------*/ -------------------------------------------------------------------------*/
#define REVISION "Revision: 2.14" #define REVISION "Revision: 2.50"
#define VERSION "Id: cdrom.c 2.14 1998/08/17 erik" #define VERSION "Id: cdrom.c 2.50 1998/10/19"
/* I use an error-log mask to give fine grain control over the type of /* I use an error-log mask to give fine grain control over the type of
messages dumped to the system logs. The available masks include: */ messages dumped to the system logs. The available masks include: */
......
...@@ -972,7 +972,7 @@ static int get_hw_addr(struct device *dev); ...@@ -972,7 +972,7 @@ static int get_hw_addr(struct device *dev);
static void srom_repair(struct device *dev, int card); static void srom_repair(struct device *dev, int card);
static int test_bad_enet(struct device *dev, int status); static int test_bad_enet(struct device *dev, int status);
static int an_exception(struct bus_type *lp); static int an_exception(struct bus_type *lp);
#if !defined(__sparc_v9__) && !defined(__powerpc__) #if !defined(__sparc_v9__) && !defined(__powerpc__) && !defined(__alpha__)
static void eisa_probe(struct device *dev, u_long iobase); static void eisa_probe(struct device *dev, u_long iobase);
#endif #endif
static void pci_probe(struct device *dev, u_long iobase); static void pci_probe(struct device *dev, u_long iobase);
...@@ -1021,7 +1021,7 @@ static int loading_module = 0; ...@@ -1021,7 +1021,7 @@ static int loading_module = 0;
#endif /* MODULE */ #endif /* MODULE */
static char name[DE4X5_NAME_LENGTH + 1]; static char name[DE4X5_NAME_LENGTH + 1];
#if !defined(__sparc_v9__) && !defined(__powerpc__) #if !defined(__sparc_v9__) && !defined(__powerpc__) && !defined(__alpha__)
static u_char de4x5_irq[] = EISA_ALLOWED_IRQ_LIST; static u_char de4x5_irq[] = EISA_ALLOWED_IRQ_LIST;
static int lastEISA = 0; static int lastEISA = 0;
#else #else
...@@ -1095,7 +1095,7 @@ de4x5_probe(struct device *dev)) ...@@ -1095,7 +1095,7 @@ de4x5_probe(struct device *dev))
{ {
u_long iobase = dev->base_addr; u_long iobase = dev->base_addr;
#if !defined(__sparc_v9__) && !defined(__powerpc__) #if !defined(__sparc_v9__) && !defined(__powerpc__) && !defined(__alpha__)
eisa_probe(dev, iobase); eisa_probe(dev, iobase);
#endif #endif
if (lastEISA == MAX_EISA_SLOTS) { if (lastEISA == MAX_EISA_SLOTS) {
...@@ -2028,7 +2028,7 @@ SetMulticastFilter(struct device *dev) ...@@ -2028,7 +2028,7 @@ SetMulticastFilter(struct device *dev)
return; return;
} }
#if !defined(__sparc_v9__) && !defined(__powerpc__) #if !defined(__sparc_v9__) && !defined(__powerpc__) && !defined(__alpha__)
/* /*
** EISA bus I/O device probe. Probe from slot 1 since slot 0 is usually ** EISA bus I/O device probe. Probe from slot 1 since slot 0 is usually
** the motherboard. Upto 15 EISA devices are supported. ** the motherboard. Upto 15 EISA devices are supported.
...@@ -2096,7 +2096,7 @@ eisa_probe(struct device *dev, u_long ioaddr)) ...@@ -2096,7 +2096,7 @@ eisa_probe(struct device *dev, u_long ioaddr))
return; return;
} }
#endif /* !(__sparc_v9__) && !(__powerpc__) */ #endif /* !(__sparc_v9__) && !(__powerpc__) && !defined(__alpha__) */
/* /*
** PCI bus I/O device probe ** PCI bus I/O device probe
...@@ -5826,7 +5826,7 @@ count_adapters(void) ...@@ -5826,7 +5826,7 @@ count_adapters(void)
u_int class = DE4X5_CLASS_CODE; u_int class = DE4X5_CLASS_CODE;
u_int device; u_int device;
#if !defined(__sparc_v9__) && !defined(__powerpc__) #if !defined(__sparc_v9__) && !defined(__powerpc__) && !defined(__alpha__)
char name[DE4X5_STRLEN]; char name[DE4X5_STRLEN];
u_long iobase = 0x1000; u_long iobase = 0x1000;
......
...@@ -1384,7 +1384,7 @@ static int speedo_ioctl(struct device *dev, struct ifreq *rq, int cmd) ...@@ -1384,7 +1384,7 @@ static int speedo_ioctl(struct device *dev, struct ifreq *rq, int cmd)
data[3] = mdio_read(ioaddr, data[0], data[1]); data[3] = mdio_read(ioaddr, data[0], data[1]);
return 0; return 0;
case SIOCDEVPRIVATE+2: /* Write the specified MII register */ case SIOCDEVPRIVATE+2: /* Write the specified MII register */
if (!suser()) if (!capable(CAP_NET_ADMIN))
return -EPERM; return -EPERM;
mdio_write(ioaddr, data[0], data[1], data[2]); mdio_write(ioaddr, data[0], data[1], data[2]);
return 0; return 0;
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
* 1994, 1995 Eberhard Moenkeberg, emoenke@gwdg.de * 1994, 1995 Eberhard Moenkeberg, emoenke@gwdg.de
* 1996 David van Leeuwen, david@tm.tno.nl * 1996 David van Leeuwen, david@tm.tno.nl
* 1997, 1998 Erik Andersen, andersee@debian.org * 1997, 1998 Erik Andersen, andersee@debian.org
* 1998 Jens Axboe, axboe@image.dk and
* Chris Zwilling, chris@cloudnet.com
*/ */
#ifndef _LINUX_CDROM_H #ifndef _LINUX_CDROM_H
......
...@@ -254,12 +254,6 @@ extern inline unsigned long get_free_page(int gfp_mask) ...@@ -254,12 +254,6 @@ extern inline unsigned long get_free_page(int gfp_mask)
/* memory.c & swap.c*/ /* memory.c & swap.c*/
/*
* Decide if we should try to do some swapout..
*/
extern int free_memory_available(void);
extern struct wait_queue * kswapd_wait;
#define free_page(addr) free_pages((addr),0) #define free_page(addr) free_pages((addr),0)
extern void FASTCALL(free_pages(unsigned long addr, unsigned long order)); extern void FASTCALL(free_pages(unsigned long addr, unsigned long order));
extern void FASTCALL(__free_page(struct page *)); extern void FASTCALL(__free_page(struct page *));
...@@ -330,6 +324,23 @@ extern void put_cached_page(unsigned long); ...@@ -330,6 +324,23 @@ extern void put_cached_page(unsigned long);
#define GFP_DMA __GFP_DMA #define GFP_DMA __GFP_DMA
/*
* Decide if we should try to do some swapout..
*/
extern int free_memory_available(void);
extern struct task_struct * kswapd_task;
extern inline void kswapd_notify(unsigned int gfp_mask)
{
if (kswapd_task) {
wake_up_process(kswapd_task);
if (gfp_mask & __GFP_WAIT) {
current->policy |= SCHED_YIELD;
schedule();
}
}
}
/* vma is the first one with address < vma->vm_end, /* vma is the first one with address < vma->vm_end,
* and even address < vma->vm_start. Have to extend vma. */ * and even address < vma->vm_start. Have to extend vma. */
static inline int expand_stack(struct vm_area_struct * vma, unsigned long address) static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
...@@ -380,11 +391,6 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m ...@@ -380,11 +391,6 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
return vma; return vma;
} }
extern __inline__ void kswapd_wakeup(void)
{
wake_up(&kswapd_wait);
}
#define buffer_under_min() ((buffermem >> PAGE_SHIFT) * 100 < \ #define buffer_under_min() ((buffermem >> PAGE_SHIFT) * 100 < \
buffer_mem.min_percent * num_physpages) buffer_mem.min_percent * num_physpages)
#define buffer_under_borrow() ((buffermem >> PAGE_SHIFT) * 100 < \ #define buffer_under_borrow() ((buffermem >> PAGE_SHIFT) * 100 < \
......
...@@ -771,6 +771,8 @@ typedef struct copr_msg { ...@@ -771,6 +771,8 @@ typedef struct copr_msg {
#define SOUND_MIXER_CAPS 0xfc #define SOUND_MIXER_CAPS 0xfc
# define SOUND_CAP_EXCL_INPUT 0x00000001 /* Only one recording source at a time */ # define SOUND_CAP_EXCL_INPUT 0x00000001 /* Only one recording source at a time */
#define SOUND_MIXER_STEREODEVS 0xfb /* Mixer channels supporting stereo */ #define SOUND_MIXER_STEREODEVS 0xfb /* Mixer channels supporting stereo */
#define SOUND_MIXER_OUTSRC 0xfa /* Arg contains a bit for each input source to output */
#define SOUND_MIXER_OUTMASK 0xf9 /* Arg contains a bit for each supported input source to output */
/* Device mask bits */ /* Device mask bits */
......
...@@ -87,7 +87,7 @@ static void check_free_space(void) ...@@ -87,7 +87,7 @@ static void check_free_space(void)
if (!acct_file || !acct_needcheck) if (!acct_file || !acct_needcheck)
return; return;
sb = acct_file->f_dentry->f_inode->i_sb; sb = acct_file->f_dentry->d_inode->i_sb;
if (!sb->s_op || !sb->s_op->statfs) if (!sb->s_op || !sb->s_op->statfs)
return; return;
......
...@@ -1488,7 +1488,8 @@ asmlinkage int sys_sched_yield(void) ...@@ -1488,7 +1488,8 @@ asmlinkage int sys_sched_yield(void)
{ {
spin_lock(&scheduler_lock); spin_lock(&scheduler_lock);
spin_lock_irq(&runqueue_lock); spin_lock_irq(&runqueue_lock);
current->policy |= SCHED_YIELD; if (current->policy == SCHED_OTHER)
current->policy |= SCHED_YIELD;
current->need_resched = 1; current->need_resched = 1;
move_last_runqueue(current); move_last_runqueue(current);
spin_unlock_irq(&runqueue_lock); spin_unlock_irq(&runqueue_lock);
......
...@@ -269,13 +269,11 @@ unsigned long __get_free_pages(int gfp_mask, unsigned long order) ...@@ -269,13 +269,11 @@ unsigned long __get_free_pages(int gfp_mask, unsigned long order)
/* /*
* If we failed to find anything, we'll return NULL, but we'll * If we failed to find anything, we'll return NULL, but we'll
* wake up kswapd _now_ ad even wait for it synchronously if * wake up kswapd _now_ and even wait for it synchronously if
* we can.. This way we'll at least make some forward progress * we can.. This way we'll at least make some forward progress
* over time. * over time.
*/ */
wake_up(&kswapd_wait); kswapd_notify(gfp_mask);
if (gfp_mask & __GFP_WAIT)
schedule();
nopage: nopage:
return 0; return 0;
} }
......
...@@ -1190,7 +1190,6 @@ kmem_cache_grow(kmem_cache_t * cachep, int flags) ...@@ -1190,7 +1190,6 @@ kmem_cache_grow(kmem_cache_t * cachep, int flags)
cachep->c_dflags = SLAB_CFLGS_GROWN; cachep->c_dflags = SLAB_CFLGS_GROWN;
cachep->c_growing++; cachep->c_growing++;
re_try:
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
/* A series of memory allocations for a new slab. /* A series of memory allocations for a new slab.
...@@ -1257,15 +1256,6 @@ kmem_cache_grow(kmem_cache_t * cachep, int flags) ...@@ -1257,15 +1256,6 @@ kmem_cache_grow(kmem_cache_t * cachep, int flags)
kmem_freepages(cachep, objp); kmem_freepages(cachep, objp);
failed: failed:
spin_lock_irq(&cachep->c_spinlock); spin_lock_irq(&cachep->c_spinlock);
if (local_flags != SLAB_ATOMIC && cachep->c_gfporder) {
/* For large order (>0) slabs, we try again.
* Needed because the gfp() functions are not good at giving
* out contiguous pages unless pushed (but do not push too hard).
*/
if (cachep->c_failures++ < 4 && cachep->c_freep == kmem_slab_end(cachep))
goto re_try;
cachep->c_failures = 1; /* Memory is low, don't try as hard next time. */
}
cachep->c_growing--; cachep->c_growing--;
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
return 0; return 0;
......
...@@ -28,11 +28,6 @@ ...@@ -28,11 +28,6 @@
#include <asm/bitops.h> #include <asm/bitops.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
/*
* When are we next due for a page scan?
*/
static unsigned long next_swap_jiffies = 0;
/* /*
* How often do we do a pageout scan during normal conditions? * How often do we do a pageout scan during normal conditions?
* Default is four times a second. * Default is four times a second.
...@@ -42,7 +37,7 @@ int swapout_interval = HZ / 4; ...@@ -42,7 +37,7 @@ int swapout_interval = HZ / 4;
/* /*
* The wait queue for waking up the pageout daemon: * The wait queue for waking up the pageout daemon:
*/ */
struct wait_queue * kswapd_wait = NULL; struct task_struct * kswapd_task = NULL;
static void init_swap_timer(void); static void init_swap_timer(void);
...@@ -509,8 +504,6 @@ void __init kswapd_setup(void) ...@@ -509,8 +504,6 @@ void __init kswapd_setup(void)
*/ */
int kswapd(void *unused) int kswapd(void *unused)
{ {
struct wait_queue wait = { current, NULL };
current->session = 1; current->session = 1;
current->pgrp = 1; current->pgrp = 1;
strcpy(current->comm, "kswapd"); strcpy(current->comm, "kswapd");
...@@ -523,11 +516,12 @@ int kswapd(void *unused) ...@@ -523,11 +516,12 @@ int kswapd(void *unused)
*/ */
lock_kernel(); lock_kernel();
/* Give kswapd a realtime priority. */ /*
current->policy = SCHED_FIFO; * Set the base priority to something smaller than a
current->rt_priority = 32; /* Fixme --- we need to standardise our * regular process. We will scale up the priority
namings for POSIX.4 realtime scheduling * dynamically depending on how much memory we need.
priorities. */ */
current->priority = (DEF_PRIORITY * 2) / 3;
/* /*
* Tell the memory management that we're a "memory allocator", * Tell the memory management that we're a "memory allocator",
...@@ -544,7 +538,7 @@ int kswapd(void *unused) ...@@ -544,7 +538,7 @@ int kswapd(void *unused)
current->flags |= PF_MEMALLOC; current->flags |= PF_MEMALLOC;
init_swap_timer(); init_swap_timer();
add_wait_queue(&kswapd_wait, &wait); kswapd_task = current;
while (1) { while (1) {
int tries; int tries;
...@@ -586,7 +580,7 @@ int kswapd(void *unused) ...@@ -586,7 +580,7 @@ int kswapd(void *unused)
} while (--tries > 0); } while (--tries > 0);
} }
/* As if we could ever get here - maybe we want to make this killable */ /* As if we could ever get here - maybe we want to make this killable */
remove_wait_queue(&kswapd_wait, &wait); kswapd_task = NULL;
unlock_kernel(); unlock_kernel();
return 0; return 0;
} }
...@@ -620,41 +614,58 @@ int try_to_free_pages(unsigned int gfp_mask, int count) ...@@ -620,41 +614,58 @@ int try_to_free_pages(unsigned int gfp_mask, int count)
return retval; return retval;
} }
/*
* Wake up kswapd according to the priority
* 0 - no wakeup
* 1 - wake up as a low-priority process
* 2 - wake up as a normal process
* 3 - wake up as an almost real-time process
*
* This plays mind-games with the "goodness()"
* function in kernel/sched.c.
*/
static inline void kswapd_wakeup(int priority)
{
if (priority) {
struct task_struct *p = kswapd_task;
if (p) {
p->counter = p->priority << priority;
wake_up_process(p);
}
}
}
/* /*
* The swap_tick function gets called on every clock tick. * The swap_tick function gets called on every clock tick.
*/ */
void swap_tick(void) void swap_tick(void)
{ {
unsigned long now, want; unsigned int pages;
int want_wakeup = 0; int want_wakeup;
want = next_swap_jiffies;
now = jiffies;
/* /*
* Examine the memory queues. Mark memory low
* if there is nothing available in the three
* highest queues.
*
* Schedule for wakeup if there isn't lots * Schedule for wakeup if there isn't lots
* of free memory. * of free memory or if there is too much
* of it used for buffers or pgcache.
*
* "want_wakeup" is our priority: 0 means
* not to wake anything up, while 3 means
* that we'd better give kswapd a realtime
* priority.
*/ */
switch (free_memory_available()) { want_wakeup = 0;
case 0: if (buffer_over_max() || pgcache_over_max())
want = now;
/* Fall through */
case 1:
want_wakeup = 1; want_wakeup = 1;
default: pages = nr_free_pages;
} if (pages < freepages.high)
want_wakeup = 1;
if ((long) (now - want) >= 0) { if (pages < freepages.low)
if (want_wakeup || buffer_over_max() || pgcache_over_max()) { want_wakeup = 2;
/* Set the next wake-up time */ if (pages < freepages.min)
next_swap_jiffies = now + swapout_interval; want_wakeup = 3;
kswapd_wakeup();
} kswapd_wakeup(want_wakeup);
}
timer_active |= (1<<SWAP_TIMER); timer_active |= (1<<SWAP_TIMER);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment