Commit 7b88e5e0 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] "io wait" process accounting

Patch from Rik adds "I/O wait" statistics to /proc/stat.

This allows us to determine how much system time is being spent
awaiting IO completion.  This is an important statistic, as it tends to
directly subtract from job completion time.

procps-2.0.9 is OK with this, but doesn't report it.
parent 7e96bae1
...@@ -58,6 +58,7 @@ static int queue_nr_requests; ...@@ -58,6 +58,7 @@ static int queue_nr_requests;
static int batch_requests; static int batch_requests;
unsigned long blk_max_low_pfn, blk_max_pfn; unsigned long blk_max_low_pfn, blk_max_pfn;
atomic_t nr_iowait_tasks = ATOMIC_INIT(0);
int blk_nohighio = 0; int blk_nohighio = 0;
static struct congestion_state { static struct congestion_state {
...@@ -116,6 +117,27 @@ static void set_queue_congested(request_queue_t *q, int rw) ...@@ -116,6 +117,27 @@ static void set_queue_congested(request_queue_t *q, int rw)
atomic_inc(&congestion_states[rw].nr_congested_queues); atomic_inc(&congestion_states[rw].nr_congested_queues);
} }
/*
* This task is about to go to sleep on IO. Increment nr_iowait_tasks so
* that process accounting knows that this is a task in IO wait state.
*
* But don't do that if it is a deliberate, throttling IO wait (this task
* has set its backing_dev_info: the queue against which it should throttle)
*/
void io_schedule(void)
{
atomic_inc(&nr_iowait_tasks);
schedule();
atomic_dec(&nr_iowait_tasks);
}
void io_schedule_timeout(long timeout)
{
atomic_inc(&nr_iowait_tasks);
schedule_timeout(timeout);
atomic_dec(&nr_iowait_tasks);
}
/** /**
* bdev_get_queue: - return the queue that matches the given device * bdev_get_queue: - return the queue that matches the given device
* @bdev: device * @bdev: device
...@@ -1274,7 +1296,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw) ...@@ -1274,7 +1296,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
prepare_to_wait_exclusive(&rl->wait, &wait, prepare_to_wait_exclusive(&rl->wait, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
if (!rl->count) if (!rl->count)
schedule(); io_schedule();
finish_wait(&rl->wait, &wait); finish_wait(&rl->wait, &wait);
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
rq = get_request(q, rw); rq = get_request(q, rw);
...@@ -1497,7 +1519,7 @@ void blk_congestion_wait(int rw, long timeout) ...@@ -1497,7 +1519,7 @@ void blk_congestion_wait(int rw, long timeout)
blk_run_queues(); blk_run_queues();
prepare_to_wait(&cs->wqh, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(&cs->wqh, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_read(&cs->nr_congested_queues) != 0) if (atomic_read(&cs->nr_congested_queues) != 0)
schedule_timeout(timeout); io_schedule_timeout(timeout);
finish_wait(&cs->wqh, &wait); finish_wait(&cs->wqh, &wait);
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <asm/atomic.h> #include <asm/atomic.h>
...@@ -230,7 +231,7 @@ static struct bio *dio_await_one(struct dio *dio) ...@@ -230,7 +231,7 @@ static struct bio *dio_await_one(struct dio *dio)
dio->waiter = current; dio->waiter = current;
spin_unlock_irqrestore(&dio->bio_list_lock, flags); spin_unlock_irqrestore(&dio->bio_list_lock, flags);
blk_run_queues(); blk_run_queues();
schedule(); io_schedule();
spin_lock_irqsave(&dio->bio_list_lock, flags); spin_lock_irqsave(&dio->bio_list_lock, flags);
dio->waiter = NULL; dio->waiter = NULL;
} }
......
...@@ -339,7 +339,7 @@ static int kstat_read_proc(char *page, char **start, off_t off, ...@@ -339,7 +339,7 @@ static int kstat_read_proc(char *page, char **start, off_t off,
int i, len; int i, len;
extern unsigned long total_forks; extern unsigned long total_forks;
unsigned long jif = jiffies; unsigned long jif = jiffies;
unsigned int sum = 0, user = 0, nice = 0, system = 0; unsigned int sum = 0, user = 0, nice = 0, system = 0, idle = 0, iowait = 0;
int major, disk; int major, disk;
for (i = 0 ; i < NR_CPUS; i++) { for (i = 0 ; i < NR_CPUS; i++) {
...@@ -349,27 +349,29 @@ static int kstat_read_proc(char *page, char **start, off_t off, ...@@ -349,27 +349,29 @@ static int kstat_read_proc(char *page, char **start, off_t off,
user += kstat.per_cpu_user[i]; user += kstat.per_cpu_user[i];
nice += kstat.per_cpu_nice[i]; nice += kstat.per_cpu_nice[i];
system += kstat.per_cpu_system[i]; system += kstat.per_cpu_system[i];
idle += kstat.per_cpu_idle[i];
iowait += kstat.per_cpu_iowait[i];
#if !defined(CONFIG_ARCH_S390) #if !defined(CONFIG_ARCH_S390)
for (j = 0 ; j < NR_IRQS ; j++) for (j = 0 ; j < NR_IRQS ; j++)
sum += kstat.irqs[i][j]; sum += kstat.irqs[i][j];
#endif #endif
} }
len = sprintf(page, "cpu %u %u %u %lu\n", len = sprintf(page, "cpu %u %u %u %u %u\n",
jiffies_to_clock_t(user), jiffies_to_clock_t(user),
jiffies_to_clock_t(nice), jiffies_to_clock_t(nice),
jiffies_to_clock_t(system), jiffies_to_clock_t(system),
jiffies_to_clock_t(jif * num_online_cpus() - (user + nice + system))); jiffies_to_clock_t(idle),
jiffies_to_clock_t(iowait));
for (i = 0 ; i < NR_CPUS; i++){ for (i = 0 ; i < NR_CPUS; i++){
if (!cpu_online(i)) continue; if (!cpu_online(i)) continue;
len += sprintf(page + len, "cpu%d %u %u %u %lu\n", len += sprintf(page + len, "cpu%d %u %u %u %u %u\n",
i, i,
jiffies_to_clock_t(kstat.per_cpu_user[i]), jiffies_to_clock_t(kstat.per_cpu_user[i]),
jiffies_to_clock_t(kstat.per_cpu_nice[i]), jiffies_to_clock_t(kstat.per_cpu_nice[i]),
jiffies_to_clock_t(kstat.per_cpu_system[i]), jiffies_to_clock_t(kstat.per_cpu_system[i]),
jiffies_to_clock_t(jif - ( kstat.per_cpu_user[i] \ jiffies_to_clock_t(kstat.per_cpu_idle[i]),
+ kstat.per_cpu_nice[i] \ jiffies_to_clock_t(kstat.per_cpu_iowait[i]));
+ kstat.per_cpu_system[i])));
} }
len += sprintf(page + len, "intr %u", sum); len += sprintf(page + len, "intr %u", sum);
......
...@@ -401,4 +401,8 @@ static inline void put_dev_sector(Sector p) ...@@ -401,4 +401,8 @@ static inline void put_dev_sector(Sector p)
page_cache_release(p.v); page_cache_release(p.v);
} }
extern atomic_t nr_iowait_tasks;
void io_schedule(void);
void io_schedule_timeout(long timeout);
#endif #endif
...@@ -18,7 +18,9 @@ ...@@ -18,7 +18,9 @@
struct kernel_stat { struct kernel_stat {
unsigned int per_cpu_user[NR_CPUS], unsigned int per_cpu_user[NR_CPUS],
per_cpu_nice[NR_CPUS], per_cpu_nice[NR_CPUS],
per_cpu_system[NR_CPUS]; per_cpu_system[NR_CPUS],
per_cpu_idle[NR_CPUS],
per_cpu_iowait[NR_CPUS];
unsigned int dk_drive[DK_MAX_MAJOR][DK_MAX_DISK]; unsigned int dk_drive[DK_MAX_MAJOR][DK_MAX_DISK];
unsigned int dk_drive_rio[DK_MAX_MAJOR][DK_MAX_DISK]; unsigned int dk_drive_rio[DK_MAX_MAJOR][DK_MAX_DISK];
unsigned int dk_drive_wio[DK_MAX_MAJOR][DK_MAX_DISK]; unsigned int dk_drive_wio[DK_MAX_MAJOR][DK_MAX_DISK];
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/blkdev.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/timer.h> #include <linux/timer.h>
...@@ -866,6 +867,10 @@ void scheduler_tick(int user_ticks, int sys_ticks) ...@@ -866,6 +867,10 @@ void scheduler_tick(int user_ticks, int sys_ticks)
/* note: this timer irq context must be accounted for as well */ /* note: this timer irq context must be accounted for as well */
if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET) if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
kstat.per_cpu_system[cpu] += sys_ticks; kstat.per_cpu_system[cpu] += sys_ticks;
else if (atomic_read(&nr_iowait_tasks) > 0)
kstat.per_cpu_iowait[cpu] += sys_ticks;
else
kstat.per_cpu_idle[cpu] += sys_ticks;
#if CONFIG_SMP #if CONFIG_SMP
idle_tick(rq); idle_tick(rq);
#endif #endif
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/security.h> #include <linux/security.h>
/* /*
* This is needed for the following functions: * This is needed for the following functions:
...@@ -51,7 +52,6 @@ ...@@ -51,7 +52,6 @@
* SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de> * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
*/ */
/* /*
* Lock ordering: * Lock ordering:
* *
...@@ -302,7 +302,7 @@ void wait_on_page_bit(struct page *page, int bit_nr) ...@@ -302,7 +302,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
prepare_to_wait(waitqueue, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(waitqueue, &wait, TASK_UNINTERRUPTIBLE);
sync_page(page); sync_page(page);
if (test_bit(bit_nr, &page->flags)) if (test_bit(bit_nr, &page->flags))
schedule(); io_schedule();
} while (test_bit(bit_nr, &page->flags)); } while (test_bit(bit_nr, &page->flags));
finish_wait(waitqueue, &wait); finish_wait(waitqueue, &wait);
} }
...@@ -366,7 +366,7 @@ void __lock_page(struct page *page) ...@@ -366,7 +366,7 @@ void __lock_page(struct page *page)
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
sync_page(page); sync_page(page);
if (PageLocked(page)) if (PageLocked(page))
schedule(); io_schedule();
} }
finish_wait(wqh, &wait); finish_wait(wqh, &wait);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment