/*
 * mm/pdflush.c - worker threads for writing back filesystem data
 *
 * Copyright (C) 2002, Linus Torvalds.
 *
 * 09Apr2002	akpm@zip.com.au
 *		Initial version
 */

#include <linux/sched.h>
#include <linux/list.h>
#include <linux/signal.h>
#include <linux/spinlock.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/suspend.h>


/*
 * Minimum and maximum number of pdflush instances
 */
#define MIN_PDFLUSH_THREADS	2
#define MAX_PDFLUSH_THREADS	8

static void start_one_pdflush_thread(void);


/*
 * The pdflush threads are worker threads for writing back dirty data.
 * Ideally, we'd like one thread per active disk spindle.  But the disk
 * topology is very hard to divine at this level.   Instead, we take
 * care in various places to prevent more than one pdflush thread from
 * performing writeback against a single filesystem.  pdflush threads
 * have the PF_FLUSHER flag set in current->flags to aid in this.
 */

/*
 * All the pdflush threads.  Protected by pdflush_lock
 */
static LIST_HEAD(pdflush_list);
static spinlock_t pdflush_lock = SPIN_LOCK_UNLOCKED;

/*
 * The count of currently-running pdflush threads.  Protected
 * by pdflush_lock.
 */
static int nr_pdflush_threads = 0;

/*
 * The time at which the pdflush thread pool last went empty
 */
static unsigned long last_empty_jifs;

/*
 * The pdflush thread.
 *
 * Thread pool management algorithm:
 * 
 * - The minimum and maximum number of pdflush instances are bound
 *   by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS.
 * 
 * - If there have been no idle pdflush instances for 1 second, create
 *   a new one.
 * 
 * - If the least-recently-went-to-sleep pdflush thread has been asleep
 *   for more than one second, terminate a thread.
 */

/*
 * A structure for passing work to a pdflush thread.  Also for passing
 * state information between pdflush threads.  Protected by pdflush_lock.
 */
struct pdflush_work {
	struct task_struct *who;	/* The thread */
	void (*fn)(unsigned long);	/* A callback function for pdflush to work on */
	unsigned long arg0;		/* An argument to the callback function */
	struct list_head list;		/* On pdflush_list, when the thread is idle */
	unsigned long when_i_went_to_sleep;
};

/*
 * preemption is disabled in pdflush.  There was a bug in preempt
 * which was causing pdflush to get flipped into state TASK_RUNNING
 * when it performed a spin_unlock.  That bug is probably fixed,
 * but play it safe.  The preempt-off paths are very short.
 */
static int __pdflush(struct pdflush_work *my_work)
{
	daemonize();
	reparent_to_init();
	strcpy(current->comm, "pdflush");

	/* interruptible sleep, so block all signals */
	spin_lock_irq(&current->sigmask_lock);
	siginitsetinv(&current->blocked, 0);
	recalc_sigpending();
	spin_unlock_irq(&current->sigmask_lock);

	current->flags |= PF_FLUSHER | PF_KERNTHREAD;
	my_work->fn = NULL;
	my_work->who = current;

	preempt_disable();
	spin_lock_irq(&pdflush_lock);
	nr_pdflush_threads++;
//	printk("pdflush %d [%d] starts\n", nr_pdflush_threads, current->pid);
	for ( ; ; ) {
		struct pdflush_work *pdf;

		list_add(&my_work->list, &pdflush_list);
		my_work->when_i_went_to_sleep = jiffies;
		set_current_state(TASK_INTERRUPTIBLE);
		spin_unlock_irq(&pdflush_lock);

#ifdef CONFIG_SOFTWARE_SUSPEND
		run_task_queue(&tq_bdflush);
		if (current->flags & PF_FREEZE)
			refrigerator(PF_IOTHREAD);
#endif
		schedule();

		preempt_enable();
		if (my_work->fn)
			(*my_work->fn)(my_work->arg0);
		preempt_disable();

		/*
		 * Thread creation: For how long have there been zero
		 * available threads?
		 */
		if (jiffies - last_empty_jifs > 1 * HZ) {
			/* unlocked list_empty() test is OK here */
			if (list_empty(&pdflush_list)) {
				/* unlocked test is OK here */
				if (nr_pdflush_threads < MAX_PDFLUSH_THREADS)
					start_one_pdflush_thread();
			}
		}

		spin_lock_irq(&pdflush_lock);

		/*
		 * Thread destruction: For how long has the sleepiest
		 * thread slept?
		 */
		if (list_empty(&pdflush_list))
			continue;
		if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
			continue;
		pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
		if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
			pdf->when_i_went_to_sleep = jiffies;	/* Limit exit rate */
			break;					/* exeunt */
		}
		my_work->fn = NULL;
	}
	nr_pdflush_threads--;
//	printk("pdflush %d [%d] ends\n", nr_pdflush_threads, current->pid);
	spin_unlock_irq(&pdflush_lock);
	preempt_enable();
	return 0;
}

/*
 * Of course, my_work wants to be just a local in __pdflush().  It is
 * separated out in this manner to hopefully prevent the compiler from
 * performing unfortunate optimisations against the auto variables.  Because
 * these are visible to other tasks and CPUs.  (No problem has actually
 * been observed.  This is just paranoia).
 */
static int pdflush(void *dummy)
{
	struct pdflush_work my_work;
	return __pdflush(&my_work);
}

/*
 * Attempt to wake up a pdflush thread, and get it to do some work for you.
 * Returns zero if it indeed managed to find a worker thread, and passed your
 * payload to it.
 */
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
{
	unsigned long flags;
	int ret = 0;

	if (fn == NULL)
		BUG();		/* Hard to diagnose if it's deferred */

	spin_lock_irqsave(&pdflush_lock, flags);
	if (list_empty(&pdflush_list)) {
		spin_unlock_irqrestore(&pdflush_lock, flags);
		ret = -1;
	} else {
		struct pdflush_work *pdf;

		pdf = list_entry(pdflush_list.next, struct pdflush_work, list);
		list_del_init(&pdf->list);
		if (list_empty(&pdflush_list))
			last_empty_jifs = jiffies;
		spin_unlock_irqrestore(&pdflush_lock, flags);
		pdf->fn = fn;
		pdf->arg0 = arg0;
		wmb();			/* ? */
		wake_up_process(pdf->who);
	}
	return ret;
}

static void start_one_pdflush_thread(void)
{
	kernel_thread(pdflush, NULL,
			CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
}

static int __init pdflush_init(void)
{
	int i;

	for (i = 0; i < MIN_PDFLUSH_THREADS; i++)
		start_one_pdflush_thread();
	return 0;
}

module_init(pdflush_init);