v2.5.0.9 -> v2.5.0.10

- Jens Axboe: more bio stuff - Ingo Molnar: mempool for bio - Niibe Yutaka: Super-H update

v2.5.0.9 -> v2.5.0.10
- Jens Axboe: more bio stuff - Ingo Molnar: mempool for bio - Niibe Yutaka: Super-H update
80044607 · Linus Torvalds · b1507c9a · 80044607 · 80044607 · 80044607
Commit 80044607 authored Feb 04, 2002 by Linus Torvalds
23 changed files
--- a/Documentation/sh/new-machine.txt
+++ b/Documentation/sh/new-machine.txt
+The multiple machine support relies on redirecting all functions which will
+need to be machine specific through a table of function pointers, the
+machvec. These functions fall into a number of categories:
+
+ - I/O functions to IO memory (inb etc) and PCI/main memory (readb etc).
+ - I/O remapping functions (ioremap etc)
+ - some initialisation functions
+ - a 'heartbeat' function
+ - some miscellaneous flags
+
+The tree can be built in two ways:
+ - as a fully generic build. All drivers are linked in, and all functions
+   go through the machvec
+ - as a machine specific build. In this case only the required drivers
+   will be linked in, and some macros may be redefined to not go through
+   the machvec where performance is important (in particular IO functions).
+
+There are three ways in which IO can be performed:
+ - none at all. This is really only useful for the 'unknown' machine type,
+   which us designed to run on a machine about which we know nothing, and
+   so all all IO instructions do nothing.
+ - fully custom. In this case all IO functions go to a machine specific
+   set of functions which can do what they like
+ - a generic set of functions. These will cope with most situations,
+   and rely on a single function, mv_port2addr, which is called through the
+   machine vector, and converts an IO address into a memory address, which
+   can be read from/written to directly.
+
+Thus adding a new machine involves the following steps (I will assume I am
+adding a machine called fred):
+
+ - add a new file include/asm-sh/io_fred.h which contains prototypes for
+   any machine specific IO functions prefixed with the machine name, for
+   example fred_inb. These will be needed when filling out the machine
+   vector. In addition, a section is required which defines what to do when
+   building a machine specific version. For example:
+
+     #ifdef __WANT_IO_DEF
+     #define inb  fred_inb
+     ...
+     #endif
+
+   This is the minimum that is required, however there are ample
+   opportunities to optimise this. In particular, by making the prototypes
+   inline function definitions, it is possible to inline the function when
+   building machine specific versions. Note that the machine vector
+   functions will still be needed, so that a module built for a generic
+   setup can be loaded.
+
+ - add a new file arch/sh/kernel/mach_fred.c. This contains the definition
+   of the machine vector. When building the machine specific version, this
+   will be the real machine vector (via an alias), while in the generic
+   version is used to initialise the machine vector, and then freed, by
+   making it initdata. This should be defined as:
+
+     struct sh_machine_vector mv_fred __initmv = {
+       mv_name: "Fred"
+     }
+     ALIAS_MV(se)
+
+ - finally add a file arch/sh/kernel/io_fred.c, which contains
+   definitions of the machine specific io functions.
+
+A note about initialisation functions. Three initialisation functions are
+provided in the machine vector:
+ - mv_arch_init - called very early on from setup_arch
+ - mv_init_irq - called from init_IRQ, after the generic SH interrupt
+   initialisation
+ - mv_init_pci - currently not used
+
+Any other remaining functions which need to be called at start up can be
+added to the list using the __initcalls macro (or module_init if the code
+can be built as a module). Many generic drivers probe to see if the device
+they are targeting is present, however this may not always be appropriate,
+so a flag can be added to the machine vector which will be set on those
+machines which have the hardware in question, reducing the probe to a
+single conditional.
--- a/Makefile
+++ b/Makefile
 VERSION = 2
 PATCHLEVEL = 5
 SUBLEVEL = 1
-EXTRAVERSION =-pre9
+EXTRAVERSION =-pre10

 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)


--- a/arch/sh/config.in
+++ b/arch/sh/config.in
@@ -189,7 +189,7 @@ if [ "$CONFIG_PCI" = "y" ]; then
   if [ "$CONFIG_PCI_GODIRECT" = "y" -o "$CONFIG_PCI_GOANY" = "y" ]; then
      define_bool CONFIG_PCI_DIRECT y
   fi
-   define_bool CONFIG_SH_PCIDMA_NONCOHERENT n
+   bool 'Cache and PCI noncoherent' CONFIG_SH_PCIDMA_NONCOHERENT n
 fi

 source drivers/pci/Config.in

--- a/arch/sh/kernel/io_7751se.c
+++ b/arch/sh/kernel/io_7751se.c
@@ -17,7 +17,7 @@
 #include <asm/hitachi_7751se.h>
 #include <asm/addrspace.h>

-#include <asm/pci.h>
+#include <linux/pci.h>
 #include <asm/pci-sh7751.h>

 #if 0
@@ -70,7 +70,7 @@ port2adr(unsigned int port)
 	else
 		return (volatile __u16 *) (PA_SUPERIO + (port << 1));
 #endif
-	maybebadio(name,port);
+	maybebadio(name,(unsigned long)port);
 	return (volatile __u16*)port;
 }

@@ -276,6 +276,7 @@ void sh7751se_writel(unsigned int b, unsigned long addr)
 /* ISA page descriptor.  */
 static __u32 sh_isa_memmap[256];

+#if 0
 static int
 sh_isa_mmap(__u32 start, __u32 length, __u32 offset)
 {
@@ -286,12 +287,11 @@ sh_isa_mmap(__u32 start, __u32 length, __u32 offset)

 	idx = start >> 12;
 	sh_isa_memmap[idx] = 0xb8000000 + (offset &~ 0xfff);
-#if 0
 	printk("sh_isa_mmap: start %x len %x offset %x (idx %x paddr %x)\n",
 	       start, length, offset, idx, sh_isa_memmap[idx]);
-#endif
 	return 0;
 }
+#endif

 unsigned long
 sh7751se_isa_port2addr(unsigned long offset)

--- a/arch/sh/kernel/pci-7751se.c
+++ b/arch/sh/kernel/pci-7751se.c
@@ -37,7 +37,6 @@
 */
 int __init pcibios_init_platform(void)
 {
-   unsigned long data;
   unsigned long bcr1, wcr1, wcr2, wcr3, mcr;
   unsigned short bcr2;


--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -560,3 +560,8 @@ void dump_stack(void)
 		}
 	}
 }
+
+void show_trace_task(struct task_struct *tsk)
+{
+	printk("Backtrace not yet implemented for SH.\n");
+}
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -358,6 +358,8 @@ inline int blk_contig_segment(request_queue_t *q, struct bio *bio,

 	if (!BIO_CONTIG(bio, nxt))
 		return 0;
+	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
+		return 0;

 	/*
 	 * bio and nxt are contigous in memory, check if the queue allows
@@ -429,8 +431,10 @@ int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg
 * specific ones if so desired
 */
 static inline int ll_new_segment(request_queue_t *q, struct request *req,
-				 struct bio *bio, int nr_segs)
+				 struct bio *bio)
 {
+	int nr_segs = bio_hw_segments(q, bio);
+
 	if (req->nr_segments + nr_segs <= q->max_segments) {
 		req->nr_segments += nr_segs;
 		return 1;
@@ -443,41 +447,23 @@ static inline int ll_new_segment(request_queue_t *q, struct request *req,
 static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
 			    struct bio *bio)
 {
-	int bio_segs;
-
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		return 0;
 	}

-	bio_segs = bio_hw_segments(q, bio);
-	if (blk_contig_segment(q, req->biotail, bio))
-		bio_segs--;
-
-	if (!bio_segs)
-		return 1;
-
-	return ll_new_segment(q, req, bio, bio_segs);
+	return ll_new_segment(q, req, bio);
 }

 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
 			     struct bio *bio)
 {
-	int bio_segs;
-
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		return 0;
 	}

-	bio_segs = bio_hw_segments(q, bio);
-	if (blk_contig_segment(q, bio, req->bio))
-		bio_segs--;
-
-	if (!bio_segs)
-		return 1;
-
-	return ll_new_segment(q, req, bio, bio_segs);
+	return ll_new_segment(q, req, bio);
 }

 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
@@ -1235,11 +1221,6 @@ void generic_make_request(struct bio *bio)
 			break;
 		}

-		/*
-		 * this needs to be handled by q->make_request_fn, to just
-		 * setup a part of the bio in the request to enable easy
-		 * multiple passing
-		 */
 		BUG_ON(bio_sectors(bio) > q->max_sectors);

 		/*
@@ -1497,6 +1478,7 @@ int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
 	while ((bio = req->bio)) {
 		nsect = bio_iovec(bio)->bv_len >> 9;

+		BIO_BUG_ON(bio_iovec(bio)->bv_len > bio->bi_size);

 		/*
 		 * not a complete bvec done
@@ -1515,11 +1497,12 @@ int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
 		 * account transfer
 		 */
 		bio->bi_size -= bio_iovec(bio)->bv_len;
+		bio->bi_idx++;

 		nr_sectors -= nsect;
 		total_nsect += nsect;

-		if (++bio->bi_idx >= bio->bi_vcnt) {
+		if (!bio->bi_size) {
 			req->bio = bio->bi_next;

 			if (unlikely(bio_endio(bio, uptodate, total_nsect)))
@@ -1619,7 +1602,9 @@ EXPORT_SYMBOL(blk_queue_max_sectors);
 EXPORT_SYMBOL(blk_queue_max_segments);
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 EXPORT_SYMBOL(blk_queue_hardsect_size);
+EXPORT_SYMBOL(blk_queue_segment_boundary);
 EXPORT_SYMBOL(blk_rq_map_sg);
 EXPORT_SYMBOL(blk_nohighio);
 EXPORT_SYMBOL(blk_dump_rq_flags);
 EXPORT_SYMBOL(submit_bio);
+EXPORT_SYMBOL(blk_contig_segment);
--- a/drivers/char/shwdt.c
+++ b/drivers/char/shwdt.c
@@ -10,7 +10,6 @@
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 */
-
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -177,7 +176,7 @@ static int sh_wdt_close(struct inode *inode, struct file *file)
 * 	sh_wdt_read - Read from Device
 *
 * 	@file: file handle of device
- * 	@char: buffer to write to
+ * 	@buf: buffer to write to
 * 	@count: length of buffer
 * 	@ppos: offset
 *
@@ -193,7 +192,7 @@ static ssize_t sh_wdt_read(struct file *file, char *buf,
 * 	sh_wdt_write - Write to Device
 *
 * 	@file: file handle of device
- * 	@char: buffer to write
+ * 	@buf: buffer to write
 * 	@count: length of buffer
 * 	@ppos: offset
 *
@@ -269,7 +268,7 @@ static int sh_wdt_ioctl(struct inode *inode, struct file *file,
 static int sh_wdt_notify_sys(struct notifier_block *this,
 			     unsigned long code, void *unused)
 {
-	if (code == SYS_DOWN || SYS_HALT) {
+	if (code == SYS_DOWN || code == SYS_HALT) {
 		sh_wdt_stop();
 	}


--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -44,11 +44,10 @@ obj-$(CONFIG_VIDEO_SAA5249) += saa5249.o i2c-old.o
 obj-$(CONFIG_VIDEO_CQCAM) += c-qcam.o
 obj-$(CONFIG_VIDEO_BWQCAM) += bw-qcam.o
 obj-$(CONFIG_VIDEO_W9966) += w9966.o
-obj-$(CONFIG_VIDEO_ZORAN) += zr36067.o i2c-old.o
 obj-$(CONFIG_VIDEO_ZORAN_BUZ) += saa7111.o saa7185.o
 obj-$(CONFIG_VIDEO_ZORAN_DC10) += saa7110.o adv7175.o
 obj-$(CONFIG_VIDEO_ZORAN_LML33) += bt819.o bt856.o
-obj-$(CONFIG_VIDEO_LML33) += bt856.o bt819.o
+obj-$(CONFIG_VIDEO_ZORAN) += zr36067.o i2c-old.o
 obj-$(CONFIG_VIDEO_PMS) += pms.o
 obj-$(CONFIG_VIDEO_PLANB) += planb.o
 obj-$(CONFIG_VIDEO_VINO) += vino.o

--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -261,7 +261,7 @@ static void idescsi_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
 	ide_drive_t *drive = hwgroup->drive;
 	idescsi_scsi_t *scsi = drive->driver_data;
 	struct request *rq = hwgroup->rq;
-	idescsi_pc_t *pc = (idescsi_pc_t *) rq->buffer;
+	idescsi_pc_t *pc = (idescsi_pc_t *) rq->special;
 	int log = test_bit(IDESCSI_LOG_CMD, &scsi->log);
 	struct Scsi_Host *host;
 	u8 *scsi_buf;
@@ -464,7 +464,7 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
 #endif /* IDESCSI_DEBUG_LOG */

 	if (rq->flags & REQ_SPECIAL) {
-		return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->buffer);
+		return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
 	}
 	blk_dump_rq_flags(rq, "ide-scsi: unsup command");
 	idescsi_end_request (0,HWGROUP (drive));
@@ -662,6 +662,7 @@ static inline struct bio *idescsi_kmalloc_bio (int count)
 	if ((first_bh = bhp = bh = bio_alloc(GFP_ATOMIC, 1)) == NULL)
 		goto abort;
 	bio_init(bh);
+	bh->bi_vcnt = 1;
 	while (--count) {
 		if ((bh = bio_alloc(GFP_ATOMIC, 1)) == NULL)
 			goto abort;
@@ -802,7 +803,7 @@ int idescsi_queue (Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
 	}

 	ide_init_drive_cmd (rq);
-	rq->buffer = (char *) pc;
+	rq->special = (char *) pc;
 	rq->bio = idescsi_dma_bio (drive, pc);
 	rq->flags = REQ_SPECIAL;
 	spin_unlock(&cmd->host->host_lock);

--- a/drivers/scsi/scsi_merge.c
+++ b/drivers/scsi/scsi_merge.c
@@ -205,8 +205,10 @@ recount_segments(Scsi_Cmnd * SCpnt)

 static inline int scsi_new_mergeable(request_queue_t * q,
 				     struct request * req,
-				     int nr_segs)
+				     struct bio *bio)
 {
+	int nr_segs = bio_hw_segments(q, bio);
+
 	/*
 	 * pci_map_sg will be able to merge these two
 	 * into a single hardware sg entry, check if
@@ -223,8 +225,9 @@ static inline int scsi_new_mergeable(request_queue_t * q,

 static inline int scsi_new_segment(request_queue_t * q,
 				   struct request * req,
-				   struct bio *bio, int nr_segs)
+				   struct bio *bio)
 {
+	int nr_segs = bio_hw_segments(q, bio);
 	/*
 	 * pci_map_sg won't be able to map these two
 	 * into a single hardware sg entry, so we have to
@@ -244,8 +247,10 @@ static inline int scsi_new_segment(request_queue_t * q,

 static inline int scsi_new_segment(request_queue_t * q,
 				   struct request * req,
-				   struct bio *bio, int nr_segs)
+				   struct bio *bio)
 {
+	int nr_segs = bio_hw_segments(q, bio);
+
 	if (req->nr_segments + nr_segs > q->max_segments) {
 		req->flags |= REQ_NOMERGE;
 		return 0;
@@ -296,45 +301,33 @@ __inline static int __scsi_back_merge_fn(request_queue_t * q,
 					 struct request *req,
 					 struct bio *bio)
 {
-	int bio_segs;
-
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		return 0;
 	}

-	bio_segs = bio_hw_segments(q, bio);
-	if (blk_contig_segment(q, req->biotail, bio))
-		bio_segs--;
-
 #ifdef DMA_CHUNK_SIZE
 	if (MERGEABLE_BUFFERS(bio, req->bio))
-		return scsi_new_mergeable(q, req, bio_segs);
+		return scsi_new_mergeable(q, req, bio);
 #endif

-	return scsi_new_segment(q, req, bio, bio_segs);
+	return scsi_new_segment(q, req, bio);
 }

 __inline static int __scsi_front_merge_fn(request_queue_t * q,
 					  struct request *req,
 					  struct bio *bio)
 {
-	int bio_segs;
-
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		return 0;
 	}

-	bio_segs = bio_hw_segments(q, bio);
-	if (blk_contig_segment(q, req->biotail, bio))
-		bio_segs--;
-
 #ifdef DMA_CHUNK_SIZE
 	if (MERGEABLE_BUFFERS(bio, req->bio))
-		return scsi_new_mergeable(q, req, bio_segs);
+		return scsi_new_mergeable(q, req, bio);
 #endif
-	return scsi_new_segment(q, req, bio, bio_segs);
+	return scsi_new_segment(q, req, bio);
 }

 /*
@@ -370,32 +363,23 @@ MERGEFCT(scsi_back_merge_fn, back)
 MERGEFCT(scsi_front_merge_fn, front)

 /*
- * Function:    __scsi_merge_requests_fn()
+ * Function:    scsi_merge_requests_fn_()
 *
- * Purpose:     Prototype for queue merge function.
+ * Purpose:     queue merge function.
 *
 * Arguments:   q       - Queue for which we are merging request.
 *              req     - request into which we wish to merge.
- *              next    - 2nd request that we might want to combine with req
- *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
- *                      expose all of the address lines, so that DMA cannot
- *                      be done from an arbitrary address).
+ *              next    - Block which we may wish to merge into request
 *
- * Returns:     1 if it is OK to merge the two requests.  0
+ * Returns:     1 if it is OK to merge the block into the request.  0
 *              if it is not OK.
 *
 * Lock status: queue lock is assumed to be held here.
 *
- * Notes:       Some drivers have limited scatter-gather table sizes, and
- *              thus they cannot queue an infinitely large command.  This
- *              function is called from ll_rw_blk before it attempts to merge
- *              a new block into a request to make sure that the request will
- *              not become too large.
 */
-__inline static int __scsi_merge_requests_fn(request_queue_t * q,
-					     struct request *req,
-					     struct request *next,
-					     int dma_host)
+inline static int scsi_merge_requests_fn(request_queue_t * q,
+					 struct request *req,
+					 struct request *next)
 {
 	int bio_segs;

@@ -445,35 +429,6 @@ __inline static int __scsi_merge_requests_fn(request_queue_t * q,
 	return 1;
 }

-/*
- * Function:    scsi_merge_requests_fn_()
- *
- * Purpose:     queue merge function.
- *
- * Arguments:   q       - Queue for which we are merging request.
- *              req     - request into which we wish to merge.
- *              bio     - Block which we may wish to merge into request
- *
- * Returns:     1 if it is OK to merge the block into the request.  0
- *              if it is not OK.
- *
- * Lock status: queue lock is assumed to be held here.
- *
- * Notes:       Optimized for different cases depending upon whether
- *              ISA DMA is in use and whether clustering should be used.
- */
-#define MERGEREQFCT(_FUNCTION, _DMA)			\
-static int _FUNCTION(request_queue_t * q,		\
-		     struct request * req,		\
-		     struct request * next)		\
-{							\
-    int ret;						\
-    ret =  __scsi_merge_requests_fn(q, req, next, _DMA); \
-    return ret;						\
-}
-
-MERGEREQFCT(scsi_merge_requests_fn_, 0)
-MERGEREQFCT(scsi_merge_requests_fn_d, 1)
 /*
 * Function:    __init_io()
 *
@@ -811,15 +766,13 @@ void initialize_merge_fn(Scsi_Device * SDpnt)
 	 * is simply easier to do it ourselves with our own functions
 	 * rather than rely upon the default behavior of ll_rw_blk.
 	 */
+	q->back_merge_fn = scsi_back_merge_fn;
+	q->front_merge_fn = scsi_front_merge_fn;
+	q->merge_requests_fn = scsi_merge_requests_fn;
+
 	if (SHpnt->unchecked_isa_dma == 0) {
-		q->back_merge_fn = scsi_back_merge_fn;
-		q->front_merge_fn = scsi_front_merge_fn;
-		q->merge_requests_fn = scsi_merge_requests_fn_;
 		SDpnt->scsi_init_io_fn = scsi_init_io_v;
 	} else {
-		q->back_merge_fn = scsi_back_merge_fn;
-		q->front_merge_fn = scsi_front_merge_fn;
-		q->merge_requests_fn = scsi_merge_requests_fn_d;
 		SDpnt->scsi_init_io_fn = scsi_init_io_vd;
 	}


--- a/fs/bio.c
+++ b/fs/bio.c
--- a/fs/partitions/acorn.c
+++ b/fs/partitions/acorn.c
@@ -162,13 +162,13 @@ adfspart_check_CUMANA(struct gendisk *hd, struct block_device *bdev,
 		struct adfs_discrecord *dr;
 		unsigned int nr_sects;

-		if (!(minor & mask))
-			break;
-
 		data = read_dev_sector(bdev, start_blk * 2 + 6, &sect);
 		if (!data)
 			return -1;

+		if (!(minor & mask))
+			break;
+
 		dr = adfs_partition(hd, name, data, first_sector, minor++);
 		if (!dr)
 			break;

--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
+#include <linux/pagemap.h>
+
 /*
 * add_gd_partition adds a partitions details to the devices partition
 * description.

--- a/include/asm-sh/pci.h
+++ b/include/asm-sh/pci.h
@@ -196,6 +196,11 @@ static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
 	return 1;
 }

+/* Not supporting more than 32-bit PCI bus addresses now, but
+ * must satisfy references to this function.  Change if needed.
+ */
+#define pci_dac_dma_supported(pci_dev, mask) (0)
+
 /* Return the index of the PCI controller for device PDEV. */
 #define pci_controller_num(PDEV)	(0)


--- a/include/asm-sh/stat.h
+++ b/include/asm-sh/stat.h
@@ -42,8 +42,16 @@ struct stat {
 * insane amounts of padding around dev_t's.
 */
 struct stat64 {
+#if defined(__BIG_ENDIAN__)
+	unsigned char   __pad0b[6];
 	unsigned short	st_dev;
-	unsigned char	__pad0[10];
+#elif defined(__LITTLE_ENDIAN__)
+	unsigned short	st_dev;
+	unsigned char	__pad0b[6];
+#else
+#error Must know endian to build stat64 structure!
+#endif
+	unsigned char	__pad0[4];

 	unsigned long	st_ino;
 	unsigned int	st_mode;
@@ -52,14 +60,25 @@ struct stat64 {
 	unsigned long	st_uid;
 	unsigned long	st_gid;

+#if defined(__BIG_ENDIAN__)
+	unsigned char	__pad3b[6];
+	unsigned short	st_rdev;
+#else /* Must be little */
 	unsigned short	st_rdev;
-	unsigned char	__pad3[10];
+	unsigned char	__pad3b[6];
+#endif
+	unsigned char	__pad3[4];

 	long long	st_size;
 	unsigned long	st_blksize;

+#if defined(__BIG_ENDIAN__)
+	unsigned long	__pad4;		/* Future possible st_blocks hi bits */
+	unsigned long	st_blocks;	/* Number 512-byte blocks allocated. */
+#else /* Must be little */
 	unsigned long	st_blocks;	/* Number 512-byte blocks allocated. */
-	unsigned long	__pad4;		/* future possible st_blocks high bits */
+	unsigned long	__pad4;		/* Future possible st_blocks hi bits */
+#endif

 	unsigned long	st_atime;
 	unsigned long	__pad5;

--- a/include/asm-sh/uaccess.h
+++ b/include/asm-sh/uaccess.h
@@ -216,6 +216,7 @@ __asm__ __volatile__( \
 	: "r" (val), "m" (__m(addr)), "i" (-EFAULT) \
        : "memory"); })
 #else
+#define __put_user_u64(val,addr,retval) \
 ({ \
 __asm__ __volatile__( \
 	"1:\n\t" \

--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -28,6 +28,8 @@
 #define BIO_BUG_ON
 #endif

+#define BIO_MAX_SECTORS	128
+
 /*
 * was unsigned short, but we might as well be ready for > 64kB I/O pages
 */
@@ -60,7 +62,7 @@ struct bio {
 	unsigned short		bi_vcnt;	/* how many bio_vec's */
 	unsigned short		bi_idx;		/* current index into bvl_vec */
 	unsigned short		bi_hw_seg;	/* actual mapped segments */
-	unsigned int		bi_size;	/* total size in bytes */
+	unsigned int		bi_size;	/* residual I/O count */
 	unsigned int		bi_max;		/* max bvl_vecs we can hold,
 						   used as index into pool */


--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
+/*
+ * memory buffer pool support
+ */
+#ifndef _LINUX_MEMPOOL_H
+#define _LINUX_MEMPOOL_H
+
+#include <linux/list.h>
+#include <linux/wait.h>
+
+struct mempool_s;
+typedef struct mempool_s mempool_t;
+
+typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data);
+typedef void (mempool_free_t)(void *element, void *pool_data);
+
+struct mempool_s {
+	spinlock_t lock;
+	int min_nr, curr_nr;
+	struct list_head elements;
+
+	void *pool_data;
+	mempool_alloc_t *alloc;
+	mempool_free_t *free;
+	wait_queue_head_t wait;
+};
+extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+				 mempool_free_t *free_fn, void *pool_data);
+extern void mempool_destroy(mempool_t *pool);
+extern void * mempool_alloc(mempool_t *pool, int gfp_mask);
+extern void mempool_free(void *element, mempool_t *pool);
+
+#endif /* _LINUX_MEMPOOL_H */
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -9,12 +9,12 @@

 O_TARGET := mm.o

-export-objs := shmem.o filemap.o
+export-objs := shmem.o filemap.o mempool.o

 obj-y	 := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
 	    vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
 	    page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
-	    shmem.o
+	    shmem.o mempool.o

 obj-$(CONFIG_HIGHMEM) += highmem.o


--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1485,8 +1485,8 @@ static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, si
 	ssize_t retval;
 	int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
 	struct kiobuf * iobuf;
-	struct inode * inode = filp->f_dentry->d_inode;
-	struct address_space * mapping = inode->i_mapping;
+	struct address_space * mapping = filp->f_dentry->d_inode->i_mapping;
+	struct inode * inode = mapping->host;

 	new_iobuf = 0;
 	iobuf = filp->f_iobuf;

--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -18,12 +18,7 @@

 #include <linux/mm.h>
 #include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/slab.h>
-#include <linux/compiler.h>
-
-#include <linux/kernel_stat.h>
+#include <linux/mempool.h>

 /*
 * Virtual_count is not a pure "count".
@@ -191,16 +186,36 @@ void kunmap_high(struct page *page)

 #define POOL_SIZE 64

-/*
- * This lock gets no contention at all, normally.
- */
-static spinlock_t emergency_lock = SPIN_LOCK_UNLOCKED;
+static mempool_t *page_pool;
+
+static void * page_pool_alloc(int gfp_mask, void *data)
+{
+	return alloc_page(gfp_mask & ~ __GFP_HIGHIO);
+}

-int nr_emergency_pages;
-static LIST_HEAD(emergency_pages);
+static void page_pool_free(void *page, void *data)
+{
+	__free_page(page);
+}

-int nr_emergency_bhs;
-static LIST_HEAD(emergency_bhs);
+static __init int init_emergency_pool(void)
+{
+	struct sysinfo i;
+	si_meminfo(&i);
+	si_swapinfo(&i);
+        
+	if (!i.totalhigh)
+		return 0;
+
+	page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
+	if (!page_pool)
+		BUG();
+	printk("highmem bounce pool size: %d pages and bhs.\n", POOL_SIZE);
+
+	return 0;
+}
+
+__initcall(init_emergency_pool);

 /*
 * Simple bounce buffer support for highmem pages. Depending on the
@@ -233,37 +248,10 @@ static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from)
 	}
 }

-static __init int init_emergency_pool(void)
-{
-	struct sysinfo i;
-        si_meminfo(&i);
-        si_swapinfo(&i);
-        
-        if (!i.totalhigh)
-        	return 0;
-
-	spin_lock_irq(&emergency_lock);
-	while (nr_emergency_pages < POOL_SIZE) {
-		struct page * page = alloc_page(GFP_ATOMIC);
-		if (!page) {
-			printk("couldn't refill highmem emergency pages");
-			break;
-		}
-		list_add(&page->list, &emergency_pages);
-		nr_emergency_pages++;
-	}
-	spin_unlock_irq(&emergency_lock);
-	printk("allocated %d pages reserved for the highmem bounces\n", nr_emergency_pages);
-	return 0;
-}
-
-__initcall(init_emergency_pool);
-
 static inline int bounce_end_io (struct bio *bio, int nr_sectors)
 {
 	struct bio *bio_orig = bio->bi_private;
 	struct bio_vec *bvec, *org_vec;
-	unsigned long flags;
 	int ret, i;

 	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
@@ -274,24 +262,13 @@ static inline int bounce_end_io (struct bio *bio, int nr_sectors)
 	/*
 	 * free up bounce indirect pages used
 	 */
-	spin_lock_irqsave(&emergency_lock, flags);
 	__bio_for_each_segment(bvec, bio, i, 0) {
 		org_vec = &bio_orig->bi_io_vec[i];
 		if (bvec->bv_page == org_vec->bv_page)
 			continue;
-	
-		if (nr_emergency_pages >= POOL_SIZE)
-			__free_page(bvec->bv_page);
-		else {
-			/*
-			 * We are abusing page->list to manage
-			 * the highmem emergency pool:
-			 */
-			list_add(&bvec->bv_page->list, &emergency_pages);
-			nr_emergency_pages++;
-		}
+
+		mempool_free(bvec->bv_page, page_pool);	
 	}
-	spin_unlock_irqrestore(&emergency_lock, flags);

 out_eio:
 	ret = bio_orig->bi_end_io(bio_orig, nr_sectors);
@@ -315,44 +292,6 @@ static int bounce_end_io_read (struct bio *bio, int nr_sectors)
 	return bounce_end_io(bio, nr_sectors);
 }

-struct page *alloc_bounce_page(int gfp_mask)
-{
-	struct list_head *tmp;
-	struct page *page;
-
-	page = alloc_page(gfp_mask);
-	if (page)
-		return page;
-	/*
-	 * No luck. First, kick the VM so it doesnt idle around while
-	 * we are using up our emergency rations.
-	 */
-	wakeup_bdflush();
-
-repeat_alloc:
-	/*
-	 * Try to allocate from the emergency pool.
-	 */
-	tmp = &emergency_pages;
-	spin_lock_irq(&emergency_lock);
-	if (!list_empty(tmp)) {
-		page = list_entry(tmp->next, struct page, list);
-		list_del(tmp->next);
-		nr_emergency_pages--;
-	}
-	spin_unlock_irq(&emergency_lock);
-	if (page)
-		return page;
-
-	/* we need to wait I/O completion */
-	run_task_queue(&tq_disk);
-
-	current->policy |= SCHED_YIELD;
-	__set_current_state(TASK_RUNNING);
-	schedule();
-	goto repeat_alloc;
-}
-
 void create_bounce(unsigned long pfn, struct bio **bio_orig)
 {
 	struct page *page;
@@ -379,7 +318,7 @@ void create_bounce(unsigned long pfn, struct bio **bio_orig)

 		to = &bio->bi_io_vec[i];

-		to->bv_page = alloc_bounce_page(GFP_NOHIGHIO);
+		to->bv_page = mempool_alloc(page_pool, GFP_NOHIGHIO);
 		to->bv_len = from->bv_len;
 		to->bv_offset = from->bv_offset;


--- a/mm/mempool.c
+++ b/mm/mempool.c
+/*
+ *  linux/mm/mempool.c
+ *
+ *  memory buffer pool support. Such pools are mostly used to
+ *  guarantee deadlock-free IO operations even during extreme
+ *  VM load.
+ *
+ *  started by Ingo Molnar, Copyright (C) 2001
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mempool.h>
+
+/**
+ * mempool_create - create a memory pool
+ * @min_nr:    the minimum number of elements guaranteed to be
+ *             allocated for this pool.
+ * @alloc_fn:  user-defined element-allocation function.
+ * @free_fn:   user-defined element-freeing function.
+ * @pool_data: optional private data available to the user-defined functions.
+ *
+ * this function creates and allocates a guaranteed size, preallocated
+ * memory pool. The pool can be used from the mempool_alloc and mempool_free
+ * functions. This function might sleep. Both the alloc_fn() and the free_fn()
+ * functions might sleep - as long as the mempool_alloc function is not called
+ * from IRQ contexts. The element allocated by alloc_fn() must be able to
+ * hold a struct list_head. (8 bytes on x86.)
+ */
+mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+				mempool_free_t *free_fn, void *pool_data)
+{
+	mempool_t *pool;
+	int i;
+
+	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return NULL;
+	memset(pool, 0, sizeof(*pool));
+
+	spin_lock_init(&pool->lock);
+	pool->min_nr = min_nr;
+	pool->pool_data = pool_data;
+	INIT_LIST_HEAD(&pool->elements);
+	init_waitqueue_head(&pool->wait);
+	pool->alloc = alloc_fn;
+	pool->free = free_fn;
+
+	/*
+	 * First pre-allocate the guaranteed number of buffers.
+	 */
+	for (i = 0; i < min_nr; i++) {
+		void *element;
+		struct list_head *tmp;
+		element = pool->alloc(GFP_KERNEL, pool->pool_data);
+
+		if (unlikely(!element)) {
+			/*
+			 * Not enough memory - free the allocated ones
+			 * and return:
+			 */
+			list_for_each(tmp, &pool->elements) {
+				element = tmp;
+				pool->free(element, pool->pool_data);
+			}
+			kfree(pool);
+
+			return NULL;
+		}
+		tmp = element;
+		list_add(tmp, &pool->elements);
+		pool->curr_nr++;
+	}
+	return pool;
+}
+
+/**
+ * mempool_destroy - deallocate a memory pool
+ * @pool:      pointer to the memory pool which was allocated via
+ *             mempool_create().
+ *
+ * this function only sleeps if the free_fn() function sleeps. The caller
+ * has to guarantee that no mempool_alloc() nor mempool_free() happens in
+ * this pool when calling this function.
+ */
+void mempool_destroy(mempool_t *pool)
+{
+	void *element;
+	struct list_head *head, *tmp;
+
+	if (!pool)
+		return;
+
+	head = &pool->elements;
+	for (tmp = head->next; tmp != head; ) {
+		element = tmp;
+		tmp = tmp->next;
+		pool->free(element, pool->pool_data);
+		pool->curr_nr--;
+	}
+	if (pool->curr_nr)
+		BUG();
+	kfree(pool);
+}
+
+/**
+ * mempool_alloc - allocate an element from a specific memory pool
+ * @pool:      pointer to the memory pool which was allocated via
+ *             mempool_create().
+ * @gfp_mask:  the usual allocation bitmask.
+ *
+ * this function only sleeps if the alloc_fn function sleeps or
+ * returns NULL. Note that due to preallocation guarantees this function
+ * *never* fails.
+ */
+void * mempool_alloc(mempool_t *pool, int gfp_mask)
+{
+	void *element;
+	unsigned long flags;
+	struct list_head *tmp;
+	int curr_nr;
+	DECLARE_WAITQUEUE(wait, current);
+	int gfp_nowait = gfp_mask & ~__GFP_WAIT;
+
+repeat_alloc:
+	element = pool->alloc(gfp_nowait, pool->pool_data);
+	if (likely(element != NULL))
+		return element;
+
+	/*
+	 * If the pool is less than 50% full then try harder
+	 * to allocate an element:
+	 */
+	if (gfp_mask != gfp_nowait) {
+		if (pool->curr_nr <= pool->min_nr/2) {
+			element = pool->alloc(gfp_mask, pool->pool_data);
+			if (likely(element != NULL))
+				return element;
+		}
+	} else
+		/* we must not sleep */
+		return NULL;
+
+	/*
+	 * Kick the VM at this point.
+	 */
+	wakeup_bdflush();
+
+	spin_lock_irqsave(&pool->lock, flags);
+	if (likely(pool->curr_nr)) {
+		tmp = pool->elements.next;
+		list_del(tmp);
+		element = tmp;
+		pool->curr_nr--;
+		spin_unlock_irqrestore(&pool->lock, flags);
+
+		return element;
+	}
+	add_wait_queue_exclusive(&pool->wait, &wait);
+	set_task_state(current, TASK_UNINTERRUPTIBLE);
+
+	curr_nr = pool->curr_nr;
+	spin_unlock_irqrestore(&pool->lock, flags);
+
+	if (!curr_nr) {
+		run_task_queue(&tq_disk);
+		schedule();
+	}
+
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&pool->wait, &wait);
+
+	goto repeat_alloc;
+}
+
+/**
+ * mempool_free - return an element to the pool.
+ * @gfp_mask:  pool element pointer.
+ * @pool:      pointer to the memory pool which was allocated via
+ *             mempool_create().
+ *
+ * this function only sleeps if the free_fn() function sleeps.
+ */
+void mempool_free(void *element, mempool_t *pool)
+{
+	unsigned long flags;
+
+	if (pool->curr_nr < pool->min_nr) {
+		spin_lock_irqsave(&pool->lock, flags);
+		if (pool->curr_nr < pool->min_nr) {
+			list_add(element, &pool->elements);
+			pool->curr_nr++;
+			spin_unlock_irqrestore(&pool->lock, flags);
+			wake_up(&pool->wait);
+			return;
+		}
+		spin_unlock_irqrestore(&pool->lock, flags);
+	}
+	pool->free(element, pool->pool_data);
+}
+
+EXPORT_SYMBOL(mempool_create);
+EXPORT_SYMBOL(mempool_destroy);
+EXPORT_SYMBOL(mempool_alloc);
+EXPORT_SYMBOL(mempool_free);
+