Merge

fdba8dd2 · Christoph Hellwig · 463b2b8d · 919dc9e2 · fdba8dd2 · fdba8dd2
Commit fdba8dd2 authored Sep 25, 2002 by Christoph Hellwig
18 changed files
--- a/fs/xfs/linux/xfs_aops.c
+++ b/fs/xfs/linux/xfs_aops.c
@@ -37,10 +37,38 @@
 #include <linux/iobuf.h>


-STATIC int linvfs_pb_bmap(struct inode *, loff_t, ssize_t,
-			  struct page_buf_bmap_s *, int);
 STATIC int delalloc_convert(struct inode *, struct page *, int, int);

+STATIC int
+map_blocks(
+	struct inode		*inode,
+	loff_t			offset,
+	ssize_t			count,
+	page_buf_bmap_t		*pbmapp,
+	int			flags)
+{
+	vnode_t			*vp = LINVFS_GET_VP(inode);
+	int			error, nmaps = 1;
+
+retry:
+	if (flags & PBF_FILE_ALLOCATE) {
+		VOP_STRATEGY(vp, offset, count, flags, NULL,
+				pbmapp, &nmaps, error);
+	} else {
+		VOP_BMAP(vp, offset, count, flags, NULL,
+				pbmapp, &nmaps, error);
+	}
+	if (flags & PBF_WRITE) {
+		if (unlikely((flags & PBF_DIRECT) && nmaps &&
+		    (pbmapp->pbm_flags & PBMF_DELAY))) {
+			flags = PBF_WRITE | PBF_FILE_ALLOCATE;
+			goto retry;
+		}
+		VMODIFY(vp);
+	}
+	return -error;
+}
+
 /*
 * match_offset_to_mapping
 * Finds the corresponding mapping in block @map array of the
@@ -195,7 +223,7 @@ probe_unmapped_page(
 	page = find_get_page(mapping, index);
 	if (!page)
 		return 0;
-	if (TestSetPageLocked(page)) {
+	if (PageWriteback(page) || TestSetPageLocked(page)) {
 		page_cache_release(page);
 		return 0;
 	}
@@ -274,7 +302,7 @@ probe_page(
 	page = find_get_page(inode->i_mapping, index);
 	if (!page)
 		return NULL;
-	if (TestSetPageLocked(page)) {
+	if (PageWriteback(page) || TestSetPageLocked(page)) {
 		page_cache_release(page);
 		return NULL;
 	}
@@ -303,6 +331,7 @@ submit_page(

 	BUG_ON(PageWriteback(page));
 	SetPageWriteback(page);
+	clear_page_dirty(page);
 	unlock_page(page);

 	if (cnt) {
@@ -461,7 +490,7 @@ delalloc_convert(

 		if (buffer_delay(bh)) {
 			if (!mp) {
-				err = linvfs_pb_bmap(inode, offset, len, &map,
+				err = map_blocks(inode, offset, len, &map,
 						PBF_WRITE|PBF_FILE_ALLOCATE);
 				if (err)
 					goto error;
@@ -487,7 +516,7 @@ delalloc_convert(
 			if (!mp) {
 				size = probe_unmapped_cluster(inode, page,
 								bh, head);
-				err = linvfs_pb_bmap(inode, offset, size, &map,
+				err = map_blocks(inode, offset, size, &map,
 						PBF_WRITE|PBF_DIRECT);
 				if (err)
 					goto error;
@@ -661,36 +690,6 @@ linvfs_direct_IO(
 					linvfs_get_blocks_direct);
 }

-STATIC int
-linvfs_pb_bmap(
-	struct inode		*inode,
-	loff_t			offset,
-	ssize_t			count,
-	page_buf_bmap_t		*pbmapp,
-	int			flags)
-{
-	vnode_t			*vp = LINVFS_GET_VP(inode);
-	int			error, nmaps = 1;
-
-retry:
-	if (flags & PBF_FILE_ALLOCATE) {
-		VOP_STRATEGY(vp, offset, count, flags, NULL,
-				pbmapp, &nmaps, error);
-	} else {
-		VOP_BMAP(vp, offset, count, flags, NULL,
-				pbmapp, &nmaps, error);
-	}
-	if (flags & PBF_WRITE) {
-		if (unlikely((flags & PBF_DIRECT) && nmaps &&
-		    (pbmapp->pbm_flags & PBMF_DELAY))) {
-			flags = PBF_WRITE | PBF_FILE_ALLOCATE;
-			goto retry;
-		}
-		VMODIFY(vp);
-	}
-	return -error;
-}
-
 STATIC int
 linvfs_bmap(
 	struct address_space	*mapping,
@@ -714,11 +713,11 @@ linvfs_bmap(
 }

 STATIC int
-linvfs_read_full_page(
+linvfs_readpage(
 	struct file		*unused,
 	struct page		*page)
 {
-	return block_read_full_page(page, linvfs_get_block);
+	return mpage_readpage(page, linvfs_get_block);
 }

 STATIC int
@@ -759,7 +758,7 @@ count_page_state(
 }

 STATIC int
-linvfs_write_full_page(
+linvfs_writepage(
 	struct page		*page)
 {
 	int			error;
@@ -801,118 +800,6 @@ linvfs_prepare_write(
 	}
 }

-#if 0
-/* Keeping this for now as an example of a better way of
- * doing O_DIRECT for XFS - the generic path has more
- * overhead than we want.
- */
-
-/*
- * Initiate I/O on a kiobuf of user memory
- */
-STATIC int
-linvfs_direct_IO(
-	int			rw,
-	struct inode		*inode,
-	struct kiobuf		*iobuf,
-	unsigned long		blocknr,
-	int			blocksize)
-{
-	struct page		**maplist;
-	size_t			page_offset;
-	page_buf_t		*pb;
-	page_buf_bmap_t		map;
-	int			error = 0;
-	int			pb_flags, map_flags, pg_index = 0;
-	size_t			length, total;
-	loff_t			offset;
-	size_t			map_size, size;
-
-	total = length = iobuf->length;
-	offset = blocknr;
-	offset <<= inode->i_blkbits;
-
-	maplist = iobuf->maplist;
-	page_offset = iobuf->offset;
-
-	map_flags = (rw ? PBF_WRITE : PBF_READ) | PBF_DIRECT;
-	pb_flags = (rw ? PBF_WRITE : PBF_READ) | PBF_FORCEIO | _PBF_LOCKABLE;
-	while (length) {
-		error = linvfs_pb_bmap(inode, offset, length, &map, map_flags);
-		if (error)
-			break;
-
-		map_size = map.pbm_bsize - map.pbm_delta;
-		size = min(map_size, length);
-		if (map.pbm_flags & PBMF_HOLE) {
-			size_t	zero_len = size;
-
-			if (rw == WRITE)
-				break;
-
-			/* Need to zero it all */
-			while (zero_len) {
-				struct page	*page;
-				size_t		pg_len;
-
-				pg_len = min((size_t)
-						(PAGE_CACHE_SIZE - page_offset),
-						zero_len);
-
-				page = maplist[pg_index];
-
-				memset(kmap(page) + page_offset, 0, pg_len);
-				flush_dcache_page(page);
-				kunmap(page);
-
-				zero_len -= pg_len;
-				if ((pg_len + page_offset) == PAGE_CACHE_SIZE) {
-					pg_index++;
-					page_offset = 0;
-				} else {
-					page_offset = (page_offset + pg_len) &
-							~PAGE_CACHE_MASK;
-				}
-			}
-		} else {
-			int	pg_count;
-
-			pg_count = (size + page_offset + PAGE_CACHE_SIZE - 1)
-					>> PAGE_CACHE_SHIFT;
-			if ((pb = pagebuf_lookup(map.pbm_target, inode, offset,
-						size, pb_flags)) == NULL) {
-				error = -ENOMEM;
-				break;
-			}
-			/* Need to hook up pagebuf to kiobuf pages */
-			pb->pb_pages = &maplist[pg_index];
-			pb->pb_offset = page_offset;
-			pb->pb_page_count = pg_count;
-
-			pb->pb_bn = map.pbm_bn + (map.pbm_delta >> 9);
-			error = pagebuf_iostart(pb, pb_flags);
-			pb->pb_flags &= ~_PBF_LOCKABLE;
-			pagebuf_rele(pb);
-			if (error != 0) {
-				if (error > 0)
-					error = -error;
-				break;
-			}
-
-			page_offset = (page_offset + size) & ~PAGE_CACHE_MASK;
-			if (page_offset)
-				pg_count--;
-			pg_index += pg_count;
-		}
-
-		offset += size;
-		length -= size;
-	}
-
-	return (error ? error : (int)(total - length));
-}
-#endif
-
 /*
 * This gets a page into cleanable state - page locked on entry
 * kept locked on exit. If the page is marked dirty we should
@@ -945,9 +832,9 @@ linvfs_release_page(


 struct address_space_operations linvfs_aops = {
-	.readpage		= linvfs_read_full_page,
+	.readpage		= linvfs_readpage,
 	.readpages		= linvfs_readpages,
-	.writepage		= linvfs_write_full_page,
+	.writepage		= linvfs_writepage,
 	.sync_page		= block_sync_page,
 	.releasepage		= linvfs_release_page,
 	.prepare_write		= linvfs_prepare_write,

--- a/fs/xfs/linux/xfs_file.c
+++ b/fs/xfs/linux/xfs_file.c
@@ -40,70 +40,76 @@ static struct vm_operations_struct linvfs_file_vm_ops;


 STATIC ssize_t
-linvfs_read(
-	struct file	*filp,
-	char		*buf,
-	size_t		size,
-	loff_t		*offset)
+linvfs_readv(
+	struct file		*filp,
+	const struct iovec	*iovp,
+	unsigned long		nr_segs,
+	loff_t			*ppos)
 {
-	vnode_t		*vp;
-	int		error;
+	vnode_t			*vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+	int			error;

-	vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
-	ASSERT(vp);
+	VOP_READ(vp, filp, iovp, nr_segs, ppos, NULL, error);

-	VOP_READ(vp, filp, buf, size, offset, NULL, error);
-
-	return(error);
+	return error;
 }


 STATIC ssize_t
-linvfs_write(
-	struct file	*file,
-	const char	*buf,
-	size_t		count,
-	loff_t		*ppos)
+linvfs_writev(
+	struct file		*filp,
+	const struct iovec	*iovp,
+	unsigned long		nr_segs,
+	loff_t			*ppos)
 {
-	struct inode	*inode = file->f_dentry->d_inode;
-	loff_t		pos;
-	vnode_t		*vp;
-	int		err;	/* Use negative errors in this f'n */
-
-	if ((ssize_t) count < 0)
-		return -EINVAL;
-
-	if (!access_ok(VERIFY_READ, buf, count))
-		return -EFAULT;
+	struct inode		*inode = filp->f_dentry->d_inode;
+	vnode_t			*vp = LINVFS_GET_VP(inode);
+	int			error = filp->f_error;

-	pos = *ppos;
-	err = -EINVAL;
-	if (pos < 0)
-		goto out;
-
-	err = file->f_error;
-	if (err) {
-		file->f_error = 0;
-		goto out;
+	if (unlikely(error)) {
+		filp->f_error = 0;
+		return error;
 	}

-	vp = LINVFS_GET_VP(inode);
-	ASSERT(vp);
-
-	/* We allow multiple direct writers in, there is no
+	/*
+	 * We allow multiple direct writers in, there is no
 	 * potential call to vmtruncate in that path.
 	 */
-	if (!(file->f_flags & O_DIRECT))
+	if (filp->f_flags & O_DIRECT) {
+		VOP_WRITE(vp, filp, iovp, nr_segs, ppos, NULL, error);
+	} else {
 		down(&inode->i_sem);
+		VOP_WRITE(vp, filp, iovp, nr_segs, ppos, NULL, error);
+		up(&inode->i_sem);
+	}

-	VOP_WRITE(vp, file, buf, count, &pos, NULL, err);
-	*ppos = pos;
+	return error;
+}

-	if (!(file->f_flags & O_DIRECT))
-		up(&inode->i_sem);
-out:

-	return(err);
+STATIC ssize_t
+linvfs_read(
+	struct file		*filp,
+	char			*buf,
+	size_t			count,
+	loff_t			*ppos)
+{
+	struct iovec		iov = {buf, count};
+
+	return linvfs_readv(filp, &iov, 1, ppos);
+}
+
+
+STATIC ssize_t
+linvfs_write(
+	struct file		*file,
+	const char		*buf,
+	size_t			count,
+	loff_t			*ppos)
+{
+	struct iovec		iov = {(void *)buf, count};
+
+	return linvfs_writev(file, &iov, 1, ppos);
 }


@@ -312,6 +318,8 @@ struct file_operations linvfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= linvfs_read,
 	.write		= linvfs_write,
+	.readv		= linvfs_readv,
+	.writev		= linvfs_writev,
 	.ioctl		= linvfs_ioctl,
 	.mmap		= linvfs_file_mmap,
 	.open		= linvfs_open,

--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -124,36 +124,64 @@ xfs_iozero(

 ssize_t			/* bytes read, or (-)  error */
 xfs_read(
-	bhv_desc_t	*bdp,
-	struct file	*file,
-	char		*buf,
-	size_t		size,
-	loff_t		*offset,
-	cred_t		*credp)
+	bhv_desc_t		*bdp,
+	struct file		*filp,
+	const struct iovec	*iovp,
+	unsigned long		segs,
+	loff_t			*offp,
+	cred_t			*credp)
 {
-	ssize_t		ret;
-	xfs_fsize_t	n;
-	xfs_inode_t	*ip;
-	xfs_mount_t	*mp;
+	size_t			size = 0;
+	ssize_t			ret;
+	xfs_fsize_t		n;
+	xfs_inode_t		*ip;
+	xfs_mount_t		*mp;
+	unsigned long		seg;
+	int			direct = filp->f_flags & O_DIRECT;

 	ip = XFS_BHVTOI(bdp);
 	mp = ip->i_mount;

 	XFS_STATS_INC(xfsstats.xs_read_calls);

-	if (file->f_flags & O_DIRECT) {
-		if (((__psint_t)buf & BBMASK) ||
-		    (*offset & mp->m_blockmask) ||
+	/* START copy & waste from filemap.c */
+	for (seg = 0; seg < segs; seg++) {
+		const struct iovec *iv = &iovp[seg];
+
+		/*
+		 * If any segment has a negative length, or the cumulative
+		 * length ever wraps negative then return -EINVAL.
+		 */
+		size += iv->iov_len;
+		if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+			return XFS_ERROR(-EINVAL);
+		if (direct) {	/* XFS specific check */
+			if ((__psint_t)iv->iov_base & BBMASK) {
+				if (*offp == ip->i_d.di_size)
+					return 0;
+				return XFS_ERROR(-EINVAL);
+			}
+		}
+		if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+			continue;
+		if (seg == 0)
+			return XFS_ERROR(-EFAULT);
+		segs = seg;
+		break;
+	}
+	/* END copy & waste from filemap.c */
+
+	if (direct) {
+		if ((*offp & mp->m_blockmask) ||
 		    (size & mp->m_blockmask)) {
-			if (*offset == ip->i_d.di_size) {
+			if (*offp == ip->i_d.di_size) {
 				return (0);
 			}
 			return -XFS_ERROR(EINVAL);
 		}
 	}

-
-	n = XFS_MAX_FILE_OFFSET - *offset;
+	n = XFS_MAX_FILE_OFFSET - *offp;
 	if ((n <= 0) || (size == 0))
 		return 0;

@@ -167,26 +195,24 @@ xfs_read(
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);

 	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
-	    !(file->f_mode & FINVIS)) {
+	    !(filp->f_mode & FINVIS)) {
 		int error;
 		vrwlock_t locktype = VRWLOCK_READ;

-		error = xfs_dm_send_data_event(DM_EVENT_READ, bdp,
-					     *offset, size,
-					     FILP_DELAY_FLAG(file),
-					     &locktype);
+		error = xfs_dm_send_data_event(DM_EVENT_READ, bdp, *offp,
+				size, FILP_DELAY_FLAG(filp), &locktype);
 		if (error) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			return -error;
 		}
 	}

-	ret = generic_file_read(file, buf, size, offset);
+	ret = generic_file_readv(filp, iovp, segs, offp);
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);

 	XFS_STATS_ADD(xfsstats.xs_read_bytes, ret);

-	if (!(file->f_mode & FINVIS))
+	if (!(filp->f_mode & FINVIS))
 		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);

 	return ret;
@@ -418,32 +444,57 @@ xfs_zero_eof(

 ssize_t				/* bytes written, or (-) error */
 xfs_write(
-	bhv_desc_t	*bdp,
-	struct file	*file,
-	const char	*buf,
-	size_t		size,
-	loff_t		*offset,
-	cred_t		*credp)
+	bhv_desc_t		*bdp,
+	struct file		*file,
+	const struct iovec	*iovp,
+	unsigned long		segs,
+	loff_t			*offset,
+	cred_t			*credp)
 {
-	xfs_inode_t	*xip;
-	xfs_mount_t	*mp;
-	ssize_t		ret;
-	int		error = 0;
-	xfs_fsize_t	isize, new_size;
-	xfs_fsize_t	n, limit = XFS_MAX_FILE_OFFSET;
-	xfs_iocore_t	*io;
-	vnode_t		*vp;
-	struct iovec	iov;
-	int		iolock;
-	int		direct = file->f_flags & O_DIRECT;
-	int		eventsent = 0;
-	vrwlock_t	locktype;
+	size_t			size = 0;
+	xfs_inode_t		*xip;
+	xfs_mount_t		*mp;
+	ssize_t			ret;
+	int			error = 0;
+	xfs_fsize_t		isize, new_size;
+	xfs_fsize_t		n, limit = XFS_MAX_FILE_OFFSET;
+	xfs_iocore_t		*io;
+	vnode_t			*vp;
+	unsigned long		seg;
+	int			iolock;
+	int			direct = file->f_flags & O_DIRECT;
+	int			eventsent = 0;
+	vrwlock_t		locktype;

 	XFS_STATS_INC(xfsstats.xs_write_calls);

 	vp = BHV_TO_VNODE(bdp);
 	xip = XFS_BHVTOI(bdp);

+	/* START copy & waste from filemap.c */
+	for (seg = 0; seg < segs; seg++) {
+		const struct iovec *iv = &iovp[seg];
+
+		/*
+		 * If any segment has a negative length, or the cumulative
+		 * length ever wraps negative then return -EINVAL.
+		 */
+		size += iv->iov_len;
+		if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+			return XFS_ERROR(-EINVAL);
+		if (direct) {	/* XFS specific check */
+			if ((__psint_t)iv->iov_base & BBMASK)
+				return XFS_ERROR(-EINVAL);
+		}
+		if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
+			continue;
+		if (seg == 0)
+			return XFS_ERROR(-EFAULT);
+		segs = seg;
+		break;
+	}
+	/* END copy & waste from filemap.c */
+
 	if (size == 0)
 		return 0;

@@ -457,9 +508,8 @@ xfs_write(
 	}

 	if (direct) {
-		if (((__psint_t)buf & BBMASK) ||
-		    (*offset & mp->m_blockmask) ||
-		    (size  & mp->m_blockmask)) {
+		if ((*offset & mp->m_blockmask) ||
+		    (size & mp->m_blockmask)) {
 			return XFS_ERROR(-EINVAL);
 		}
 		iolock = XFS_IOLOCK_SHARED;
@@ -481,6 +531,7 @@ xfs_write(
 		xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
 		return -EFBIG;
 	}
+
 	if (n < size)
 		size = n;

@@ -572,10 +623,7 @@ xfs_write(
 		xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1);
 	}

-	iov.iov_base = (void *)buf;
-	iov.iov_len = size;
-
-	ret = generic_file_write_nolock(file, &iov, 1, offset);
+	ret = generic_file_write_nolock(file, iovp, segs, offset);

 	if ((ret == -ENOSPC) &&
 	    DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&

--- a/fs/xfs/linux/xfs_lrw.h
+++ b/fs/xfs/linux/xfs_lrw.h
@@ -47,19 +47,19 @@ extern int xfs_bdstrat_cb (struct xfs_buf *);
 extern int xfs_zero_eof (vnode_t *, struct xfs_iocore *, xfs_off_t,
 				xfs_fsize_t, xfs_fsize_t, struct pm *);
 extern ssize_t xfs_read (
-	struct bhv_desc	       *bdp,
-	struct file		*file,
-	char			*buf,
-	size_t			size,
-	loff_t			*offset,
-	struct cred	       *credp);
+	struct bhv_desc		*bdp,
+	struct file		*filp,
+	const struct iovec	*iovp,
+	unsigned long		segs,
+	loff_t			*offp,
+	struct cred		*credp);

 extern ssize_t xfs_write (
 	struct bhv_desc		*bdp,
-	struct file		*file,
-	const char		*buf,
-	size_t			size,
-	loff_t			*offset,
+	struct file		*filp,
+	const struct iovec	*iovp,
+	unsigned long		segs,
+	loff_t			*offp,
 	struct cred		*credp);

 extern int xfs_recover_read_only (xlog_t *);

--- a/fs/xfs/linux/xfs_vnode.h
+++ b/fs/xfs/linux/xfs_vnode.h
@@ -56,10 +56,8 @@ typedef __u64	vnumber_t;

 /*
 * MP locking protocols:
- *	v_flag, v_count				VN_LOCK/VN_UNLOCK
- *	v_vfsp					VN_LOCK/VN_UNLOCK
+ *	v_flag, v_vfsp				VN_LOCK/VN_UNLOCK
 *	v_type					read-only or fs-dependent
- *	v_list, v_hashp, v_hashn		freelist lock
 */
 typedef struct vnode {
 	__u32		v_flag;			/* vnode flags (see below) */
@@ -70,9 +68,9 @@ typedef struct vnode {

 	spinlock_t	v_lock;			/* don't use VLOCK on Linux */
 	struct inode	v_inode;		/* linux inode */
-#ifdef	CONFIG_XFS_VNODE_TRACING
+#ifdef CONFIG_XFS_VNODE_TRACING
 	struct ktrace	*v_trace;		/* trace header structure    */
-#endif	/* CONFIG_XFS_VNODE_TRACING */
+#endif
 } vnode_t;

 /*
@@ -170,7 +168,6 @@ typedef enum vchange {
 #define v_fops		v_bh.bh_first->bd_ops  /* ops for first behavior */


-union rval;
 struct uio;
 struct file;
 struct vattr;
@@ -178,9 +175,11 @@ struct page_buf_bmap_s;
 struct attrlist_cursor_kern;

 typedef int	(*vop_open_t)(bhv_desc_t *, struct cred *);
-typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct file *, char *, size_t,
+typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct file *,
+				const struct iovec *, unsigned long,
 				loff_t *, struct cred *);
-typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct file *, const char *, size_t,
+typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct file *,
+				const struct iovec *, unsigned long,
 				loff_t *, struct cred *);
 typedef int	(*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, unsigned int, unsigned long);
 typedef int	(*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
@@ -275,21 +274,16 @@ typedef struct vnodeops {
 */
 #define _VOP_(op, vp)	(*((vnodeops_t *)(vp)->v_fops)->op)

-/*
- * Be careful with VOP_OPEN, since we're holding the chain lock on the
- * original vnode and VOP_OPEN semantic allows the new vnode to be returned
- * in vpp. The practice of passing &vp for vpp just doesn't work.
- */
-#define VOP_READ(vp,file,buf,size,offset,cr,rv)				\
+#define VOP_READ(vp,file,iov,segs,offset,cr,rv)				\
 {									\
 	VN_BHV_READ_LOCK(&(vp)->v_bh);					\
-	rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,buf,size,offset,cr); \
+	rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,cr); \
 	VN_BHV_READ_UNLOCK(&(vp)->v_bh);				\
 }
-#define VOP_WRITE(vp,file,buf,size,offset,cr,rv)			\
+#define VOP_WRITE(vp,file,iov,segs,offset,cr,rv)			\
 {									\
 	VN_BHV_READ_LOCK(&(vp)->v_bh);					\
-	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,buf,size,offset,cr);\
+	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,cr);\
 	VN_BHV_READ_UNLOCK(&(vp)->v_bh);				\
 }
 #define VOP_BMAP(vp,of,sz,rw,cr,b,n,rv)					\

--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -55,11 +55,12 @@
 #include <linux/vmalloc.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
-#include <asm/softirq.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>

+#include <support/debug.h>
 #include <support/kmem.h>
+
 #include "page_buf_internal.h"

 #define SECTOR_SHIFT	9
@@ -548,8 +549,8 @@ _pagebuf_lookup_pages(
 			} else if (!PagePrivate(page)) {
 				unsigned long i, range = (offset + nbytes) >> SECTOR_SHIFT;

-				assert(blocksize < PAGE_CACHE_SIZE);
-				assert(!(pb->pb_flags & _PBF_PRIVATE_BH));
+				ASSERT(blocksize < PAGE_CACHE_SIZE);
+				ASSERT(!(pb->pb_flags & _PBF_PRIVATE_BH));
 				/*
 				 * In this case page->private holds a bitmap
 				 * of uptodate sectors (512) within the page
@@ -1317,8 +1318,8 @@ bio_end_io_pagebuf(
 		} else if (!PagePrivate(page)) {
 			unsigned int	j, range;

-			assert(blocksize < PAGE_CACHE_SIZE);
-			assert(!(pb->pb_flags & _PBF_PRIVATE_BH));
+			ASSERT(blocksize < PAGE_CACHE_SIZE);
+			ASSERT(!(pb->pb_flags & _PBF_PRIVATE_BH));

 			range = (bvec->bv_offset + bvec->bv_len)>>SECTOR_SHIFT;
 			for (j = bvec->bv_offset>>SECTOR_SHIFT; j < range; j++)
@@ -1607,7 +1608,7 @@ pagebuf_iomove(

 	while (cboff < boff) {
 		pagebuf_segment(pb, &cboff, &page, &cpoff, &csize);
-		assert(((csize + cpoff) <= PAGE_CACHE_SIZE));
+		ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));

 		switch (mode) {
 		case PBRW_ZERO:

--- a/fs/xfs/pagebuf/page_buf.h
+++ b/fs/xfs/pagebuf/page_buf.h
@@ -378,13 +378,6 @@ extern int pagebuf_ispin( page_buf_t *); /* check if pagebuf is pinned	*/

 /* Reading and writing pages */

-extern int pagebuf_write_full_page(	/* write a page via pagebuf	*/
-		struct page *,		/* page to write		*/
-		int delalloc);		/* delalloc bh present		*/
-
-extern int pagebuf_release_page(	/* Attempt to convert a delalloc page */
-		struct page *);		/* page to release		*/
-
 extern void pagebuf_delwri_queue(page_buf_t *, int);
 extern void pagebuf_delwri_dequeue(page_buf_t *);


--- a/fs/xfs/pagebuf/page_buf_internal.h
+++ b/fs/xfs/pagebuf/page_buf_internal.h
@@ -151,18 +151,6 @@ extern struct pbstats pbstats;

 #define PB_STATS_INC(count)	( count ++ )

-#undef assert
-#ifdef PAGEBUF_DEBUG
-# define assert(expr) \
-	if (!(expr)) {						\
-		printk("Assertion failed: %s\n%s::%s line %d\n",\
-		#expr,__FILE__,__FUNCTION__,__LINE__);		\
-		BUG();						\
-	}
-#else
-# define assert(x)	do { } while (0)
-#endif
-
 #ifndef STATIC
 # define STATIC static
 #endif

--- a/fs/xfs/pagebuf/page_buf_locking.c
+++ b/fs/xfs/pagebuf/page_buf_locking.c
@@ -54,6 +54,8 @@
 #include <linux/init.h>
 #include <linux/major.h>

+#include <support/debug.h>
+
 #include "page_buf_internal.h"

 #ifndef EVMS_MAJOR
@@ -76,7 +78,7 @@ pagebuf_cond_lock(			/* lock buffer, if not locked	*/
 {
 	int			locked;

-	assert(pb->pb_flags & _PBF_LOCKABLE);
+	ASSERT(pb->pb_flags & _PBF_LOCKABLE);

 	locked = down_trylock(&PBP(pb)->pb_sema) == 0;
 	if (locked) {
@@ -97,7 +99,7 @@ int
 pagebuf_lock_value(
 	page_buf_t		*pb)
 {
-	assert(pb->pb_flags & _PBF_LOCKABLE);
+	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
 	return(atomic_read(&PBP(pb)->pb_sema.count));
 }

@@ -113,7 +115,7 @@ int
 pagebuf_lock(
 	page_buf_t		*pb)
 {
-	assert(pb->pb_flags & _PBF_LOCKABLE);
+	ASSERT(pb->pb_flags & _PBF_LOCKABLE);

 	PB_TRACE(pb, PB_TRACE_REC(lock), 0);
 	if (atomic_read(&PBP(pb)->pb_io_remaining))
@@ -219,7 +221,7 @@ void
 pagebuf_unlock(				/* unlock buffer		*/
 	page_buf_t		*pb)	/* buffer to unlock		*/
 {
-	assert(pb->pb_flags & _PBF_LOCKABLE);
+	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
 	PB_CLEAR_OWNER(pb);
 	up(&PBP(pb)->pb_sema);
 	PB_TRACE(pb, PB_TRACE_REC(unlock), 0);

--- a/fs/xfs/support/time.h
+++ b/fs/xfs/support/time.h
@@ -33,6 +33,7 @@
 #define __XFS_SUPPORT_TIME_H__

 #include <linux/sched.h>
+#include <linux/time.h>

 static inline void delay(long ticks)
 {
@@ -42,8 +43,11 @@ static inline void delay(long ticks)

 static inline void nanotime(struct timespec *tvp)
 {
-	tvp->tv_sec = xtime.tv_sec;
-	tvp->tv_nsec = xtime.tv_nsec;
+	struct timeval tv;
+
+	do_gettimeofday(&tv);
+	tvp->tv_sec = tv.tv_sec;
+	tvp->tv_nsec = tv.tv_usec * 1000;
 }

 #endif /* __XFS_SUPPORT_TIME_H__ */
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -507,26 +507,6 @@ xfs_iget(
 }


-/*
- * A 'special' interface to xfs_iget, where the
- * vnode is already allocated.
- */
-int
-xfs_vn_iget(
-	vfs_t		*vfsp,
-	struct vnode	*vp,
-	xfs_ino_t	ino)
-{
-	xfs_inode_t	*ip;
-	xfs_mount_t	*mp = XFS_BHVTOM(vfsp->vfs_fbhv);
-	int error;
-
-	error = xfs_iget_core(vp, mp, NULL, ino, 0, &ip, 0);
-
-	return error;
-}
-
-
 /*
 * Do the setup for the various locks within the incore inode.
 */

--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -490,7 +490,6 @@ xfs_inode_t	*xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
 void		xfs_inode_lock_init(xfs_inode_t *, struct vnode *);
 int		xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			 uint, xfs_inode_t **, xfs_daddr_t);
-int		xfs_vn_iget(vfs_t *, struct vnode *, xfs_ino_t);
 void		xfs_iput(xfs_inode_t *, uint);
 void		xfs_iput_new(xfs_inode_t *, uint);
 void		xfs_ilock(xfs_inode_t *, uint);

--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -788,16 +788,6 @@ xfs_mountfs(
 		}
 	}

-	/*
-	 * Disallow mount attempts with (IRIX) project quota enabled
-	 */
-	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
-	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT)) {
-		cmn_err(CE_WARN, "XFS: IRIX project quota are enabled");
-		error = XFS_ERROR(ENOSYS);
-		goto error1;
-	}
-
 	/*
 	 * Initialize realtime fields in the mount structure
 	 */

--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -91,7 +91,6 @@ xfs_lock_for_rename(
 	int			error;
 	int			i, j;
 	uint			lock_mode;
-	uint			lookup_flags;
 	int			diff_dirs = (dp1 != dp2);

 	ip2 = NULL;
@@ -123,11 +122,7 @@ xfs_lock_for_rename(
 		lock_mode = xfs_ilock_map_shared(dp2);
 	}

-	lookup_flags = DLF_IGET;
-	if (lock_mode == XFS_ILOCK_SHARED) {
-		lookup_flags |= DLF_LOCK_SHARED;
-	}
-	error = xfs_dir_lookup_int(XFS_ITOBHV(dp2), lookup_flags,
+	error = xfs_dir_lookup_int(XFS_ITOBHV(dp2), lock_mode,
 				   dentry2, &inum2, &ip2);
 	if (error == ENOENT) {		/* target does not need to exist. */
 		inum2 = 0;

--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -64,52 +64,27 @@ xfs_get_dir_entry(
 	return 0;
 }

-/*
- * Wrapper around xfs_dir_lookup.
- *
- * If DLF_IGET is set, then this routine will also return the inode.
- * Note that the inode will not be locked. Note, however, that the
- * vnode will have an additional reference in this case.
- */
 int
 xfs_dir_lookup_int(
 	bhv_desc_t		*dir_bdp,
-	int			flags,
+	uint			lock_mode,
 	struct dentry		*dentry,
 	xfs_ino_t		*inum,
 	xfs_inode_t		**ipp)
 {
 	vnode_t		*dir_vp;
 	xfs_inode_t	*dp;
-	char		*name = (char *) dentry->d_name.name;
-	int		name_len = dentry->d_name.len;
 	int		error;
-	int		do_iget;
-	uint		lock_mode;
-	bhv_desc_t	*bdp;

 	dir_vp = BHV_TO_VNODE(dir_bdp);
 	vn_trace_entry(dir_vp, "xfs_dir_lookup_int",
 		       (inst_t *)__return_address);

-	do_iget = flags & DLF_IGET;
-	error = 0;
-
-	if (flags & DLF_LOCK_SHARED) {
-		lock_mode = XFS_ILOCK_SHARED;
-	} else {
-		lock_mode = XFS_ILOCK_EXCL;
-	}
-
 	dp = XFS_BHVTOI(dir_bdp);
-	bdp = NULL;
-
-	/*
-	 * If all else fails, call the directory code.
-	 */

-	error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp, name, name_len, inum);
-	if (!error && do_iget) {
+	error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp,
+			(char *)dentry->d_name.name, dentry->d_name.len, inum);
+	if (!error) {
 		/*
 		 * Unlock the directory. We do this because we can't
 		 * hold the directory lock while doing the vn_get()
@@ -119,22 +94,12 @@ xfs_dir_lookup_int(
 		 * reservation in the inactive routine.
 		 */
 		xfs_iunlock(dp, lock_mode);
-
-		if (bdp) {
-			VN_RELE(BHV_TO_VNODE(bdp));
-			bdp = NULL;
-		}
-
 		error = xfs_iget(dp->i_mount, NULL, *inum, 0, ipp, 0);
-
 		xfs_ilock(dp, lock_mode);

 		if (error) {
 			*ipp = NULL;
-			return error;
-		}
-
-		if ((*ipp)->i_d.di_mode == 0) {
+		} else if ((*ipp)->i_d.di_mode == 0) {
 			/*
 			 * The inode has been freed.  Something is
 			 * wrong so just get out of here.
@@ -144,20 +109,8 @@ xfs_dir_lookup_int(
 			*ipp = NULL;
 			xfs_ilock(dp, lock_mode);
 			error = XFS_ERROR(ENOENT);
-		} else {
-			bdp = XFS_ITOBHV(*ipp);
-			bdp = NULL;
 		}
 	}
-	if (bdp) {
-		/* The only time we should get here is if the dir_lookup
-		 * failed.
-		 */
-		ASSERT(error);
-		xfs_iunlock(dp, lock_mode);
-		VN_RELE(BHV_TO_VNODE(bdp));
-		xfs_ilock(dp, lock_mode);
-	}
 	return error;
 }


--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -37,9 +37,6 @@
 #define ITRACE(ip)	vn_trace_ref(XFS_ITOV(ip), __FILE__, __LINE__, \
 				(inst_t *)__return_address)

-#define DLF_IGET	0x01	/* get entry inode if name lookup succeeds */
-#define DLF_LOCK_SHARED 0x02	/* directory locked shared */
-
 struct bhv_desc;
 struct cred;
 struct vnode;
@@ -63,7 +60,7 @@ xfs_get_dir_entry(
 extern int
 xfs_dir_lookup_int(
 	struct bhv_desc		*dir_bdp,
-	int			flags,
+	uint			lock_mode,
 	struct dentry		*dentry,
 	xfs_ino_t		*inum,
 	struct xfs_inode	**ipp);

--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -198,61 +198,17 @@ xfs_cleanup(void)
 }

 /*
- * xfs_cmountfs
- *
- * This function is the common mount file system function for XFS.
+ * xfs_start_flags
+ * 
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
 */
 STATIC int
-xfs_cmountfs(
-	vfs_t		*vfsp,
-	dev_t		ddev,
-	dev_t		logdev,
-	dev_t		rtdev,
-	struct xfs_mount_args *ap,
-	struct cred	*cr)
+xfs_start_flags(
+	struct xfs_mount_args	*ap,
+	struct xfs_mount	*mp,
+	int			ronly)
 {
-	xfs_mount_t	*mp;
-	int		error = 0;
-
-
-	/*
-	 * Allocate VFS private data (xfs mount structure).
-	 */
-	mp = xfs_mount_init();
-
-	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
-
-	/*
-	 * Open data, real time, and log devices now - order is important.
-	 */
-	mp->m_ddev_targp = pagebuf_lock_enable(ddev, 0);
-	if (IS_ERR(mp->m_ddev_targp)) {
-		error = PTR_ERR(mp->m_ddev_targp);
-		goto error2;
-	}
-
-	if (rtdev != 0) {
-		mp->m_rtdev_targp = 
-				pagebuf_lock_enable(rtdev, 1);
-		if (IS_ERR(mp->m_rtdev_targp)) {
-			error = PTR_ERR(mp->m_rtdev_targp);
-			pagebuf_lock_disable(mp->m_ddev_targp, 0);
-			goto error2;
-		}
-	}
-
-	if (logdev != ddev) {
-		mp->m_logdev_targp = 
-				pagebuf_lock_enable(logdev, 1);
-		if (IS_ERR(mp->m_logdev_targp)) {
-			error = PTR_ERR(mp->m_logdev_targp);
-			pagebuf_lock_disable(mp->m_ddev_targp, 1);
-			if (mp->m_rtdev_targp)
-				pagebuf_lock_disable(mp->m_rtdev_targp, 1);
-			goto error2;
-		}
-	}
-
 	/* Values are in BBs */
 	if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
 		/*
@@ -263,187 +219,155 @@ xfs_cmountfs(
 		 */
 		mp->m_dalign = ap->sunit;
 		mp->m_swidth = ap->swidth;
-	} else {
-		mp->m_dalign = 0;
-		mp->m_swidth = 0;
 	}

-	if (logdev != 0) {
-		if (logdev == ddev) {
-			mp->m_logdev_targp = mp->m_ddev_targp;
-		} else {
-			/* Set the log device's block size */
-			set_blocksize(mp->m_logdev_targp->pbr_bdev, 512);
-		}
-
-		if (ap->logbufs != 0 && ap->logbufs != -1 &&
-		    (ap->logbufs < XLOG_NUM_ICLOGS ||
-		     ap->logbufs > XLOG_MAX_ICLOGS)) {
-			cmn_err(CE_WARN, 
-				"XFS: invalid logbufs value: %d [not %d-%d]\n",
-				ap->logbufs, XLOG_NUM_ICLOGS, XLOG_MAX_ICLOGS);
-			error = XFS_ERROR(EINVAL);
-			goto error3;
-		}
-		mp->m_logbufs = ap->logbufs;
-		if (ap->logbufsize != -1 &&
-		    ap->logbufsize != 16 * 1024 &&
-		    ap->logbufsize != 32 * 1024 &&
-		    ap->logbufsize != 64 * 1024 &&
-		    ap->logbufsize != 128 * 1024 &&
-		    ap->logbufsize != 256 * 1024) {
-			cmn_err(CE_WARN,
-		"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]\n",
-				ap->logbufsize);
-			error = XFS_ERROR(EINVAL);
-			goto error3;
-		}
-		mp->m_logbsize = ap->logbufsize;
-		mp->m_fsname_len = strlen(ap->fsname) + 1;
-		mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
-		strcpy(mp->m_fsname, ap->fsname);
+	if (ap->logbufs != 0 && ap->logbufs != -1 &&
+	    (ap->logbufs < XLOG_NUM_ICLOGS ||
+	     ap->logbufs > XLOG_MAX_ICLOGS)) {
+		cmn_err(CE_WARN, 
+			"XFS: invalid logbufs value: %d [not %d-%d]\n",
+			ap->logbufs, XLOG_NUM_ICLOGS, XLOG_MAX_ICLOGS);
+		return XFS_ERROR(EINVAL);
 	}
-	if (rtdev != 0) {
-		if (rtdev == ddev || rtdev == logdev) {
-			cmn_err(CE_WARN,
-	"XFS: Cannot mount filesystem with identical rtdev and logdev.");
-			error = XFS_ERROR(EINVAL);
-			goto error3;
-		} else {
-			/* Set the realtime device's block size */
-			set_blocksize(mp->m_rtdev_targp->pbr_bdev, 512);
-		}
+	mp->m_logbufs = ap->logbufs;
+	if (ap->logbufsize != -1 &&
+	    ap->logbufsize != 16 * 1024 &&
+	    ap->logbufsize != 32 * 1024 &&
+	    ap->logbufsize != 64 * 1024 &&
+	    ap->logbufsize != 128 * 1024 &&
+	    ap->logbufsize != 256 * 1024) {
+		cmn_err(CE_WARN,
+	"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]\n",
+			ap->logbufsize);
+		return XFS_ERROR(EINVAL);
 	}
+	mp->m_logbsize = ap->logbufsize;
+	mp->m_fsname_len = strlen(ap->fsname) + 1;
+	mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
+	strcpy(mp->m_fsname, ap->fsname);

 	/*
 	 * Pull in the 'wsync' and 'ino64' mount options before we do the real
 	 * work of mounting and recovery.  The arg pointer will
 	 * be NULL when we are being called from the root mount code.
 	 */
+	if (ap->flags & XFSMNT_WSYNC)
+		mp->m_flags |= XFS_MOUNT_WSYNC;
 #if XFS_BIG_FILESYSTEMS
-	mp->m_inoadd = 0;
-#endif
-	if (ap != NULL) {
-		if (ap->flags & XFSMNT_WSYNC)
-			mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_FILESYSTEMS
-		if (ap->flags & XFSMNT_INO64) {
-			mp->m_flags |= XFS_MOUNT_INO64;
-			mp->m_inoadd = XFS_INO64_OFFSET;
-		}
+	if (ap->flags & XFSMNT_INO64) {
+		mp->m_flags |= XFS_MOUNT_INO64;
+		mp->m_inoadd = XFS_INO64_OFFSET;
+	}
 #endif
-		if (ap->flags & XFSMNT_NOATIME)
-			mp->m_flags |= XFS_MOUNT_NOATIME;
+	if (ap->flags & XFSMNT_NOATIME)
+		mp->m_flags |= XFS_MOUNT_NOATIME;

-		if (ap->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA))
-			xfs_qm_mount_quotainit(mp, ap->flags);
+	if (ap->flags & (XFSMNT_UQUOTA | XFSMNT_GQUOTA))
+		xfs_qm_mount_quotainit(mp, ap->flags);

-		if (ap->flags & XFSMNT_RETERR)
-			mp->m_flags |= XFS_MOUNT_RETERR;
+	if (ap->flags & XFSMNT_RETERR)
+		mp->m_flags |= XFS_MOUNT_RETERR;

-		if (ap->flags & XFSMNT_NOALIGN)
-			mp->m_flags |= XFS_MOUNT_NOALIGN;
+	if (ap->flags & XFSMNT_NOALIGN)
+		mp->m_flags |= XFS_MOUNT_NOALIGN;

-		if (ap->flags & XFSMNT_OSYNCISOSYNC)
-			mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
+	if (ap->flags & XFSMNT_OSYNCISOSYNC)
+		mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;

-		/* Default on Linux */
-		if ( 1 || ap->flags & XFSMNT_32BITINODES)
-			mp->m_flags |= XFS_MOUNT_32BITINODES;
+	/* Default on Linux */
+	if (1 || ap->flags & XFSMNT_32BITINODES)
+		mp->m_flags |= XFS_MOUNT_32BITINODES;

-		if (ap->flags & XFSMNT_IRIXSGID)
-			mp->m_flags |= XFS_MOUNT_IRIXSGID;
+	if (ap->flags & XFSMNT_IRIXSGID)
+		mp->m_flags |= XFS_MOUNT_IRIXSGID;

-		if (ap->flags & XFSMNT_IOSIZE) {
-			if (ap->iosizelog > XFS_MAX_IO_LOG ||
-			    ap->iosizelog < XFS_MIN_IO_LOG) {
-				cmn_err(CE_WARN,
-			"XFS: invalid log iosize: %d [not %d-%d]",
-					ap->iosizelog, XFS_MIN_IO_LOG,
-					XFS_MAX_IO_LOG);
-				error = XFS_ERROR(EINVAL);
-				goto error3;
-			}
-
-			mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-			mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
-		}
-
-		/*
-		 * no recovery flag requires a read-only mount
-		 */
-		if (ap->flags & XFSMNT_NORECOVERY) {
-			if (!(vfsp->vfs_flag & VFS_RDONLY)) {
-				cmn_err(CE_WARN,
-		"XFS: tried to mount a FS read-write without recovery!");
-				error = XFS_ERROR(EINVAL);
-				goto error3;
-			}
-			mp->m_flags |= XFS_MOUNT_NORECOVERY;
+	if (ap->flags & XFSMNT_IOSIZE) {
+		if (ap->iosizelog > XFS_MAX_IO_LOG ||
+		    ap->iosizelog < XFS_MIN_IO_LOG) {
+			cmn_err(CE_WARN,
+		"XFS: invalid log iosize: %d [not %d-%d]",
+				ap->iosizelog, XFS_MIN_IO_LOG,
+				XFS_MAX_IO_LOG);
+			return XFS_ERROR(EINVAL);
 		}

-		if (ap->flags & XFSMNT_NOUUID)
-			mp->m_flags |= XFS_MOUNT_NOUUID;
-		if (ap->flags & XFSMNT_NOLOGFLUSH)
-			mp->m_flags |= XFS_MOUNT_NOLOGFLUSH;
+		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+		mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
 	}

 	/*
-	 * read in superblock to check read-only flags and shared
-	 * mount status
+	 * no recovery flag requires a read-only mount
 	 */
-	if ((error = xfs_readsb(mp)))
-		goto error3;
+	if (ap->flags & XFSMNT_NORECOVERY) {
+		if (!ronly) {
+			cmn_err(CE_WARN,
+	"XFS: tried to mount a FS read-write without recovery!");
+			return XFS_ERROR(EINVAL);
+		}
+		mp->m_flags |= XFS_MOUNT_NORECOVERY;
+	}

+	if (ap->flags & XFSMNT_NOUUID)
+		mp->m_flags |= XFS_MOUNT_NOUUID;
+	if (ap->flags & XFSMNT_NOLOGFLUSH)
+		mp->m_flags |= XFS_MOUNT_NOLOGFLUSH;
+
+	return 0;
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+	struct xfs_mount_args	*ap,
+	struct xfs_mount	*mp,
+	int			ronly)
+{
 	/* Fail a mount where the logbuf is smaller then the log stripe */
 	if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
 		if (((ap->logbufsize == -1) &&
 		     (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) ||
 		    (ap->logbufsize < mp->m_sb.sb_logsunit)) {
-			cmn_err(CE_WARN, "XFS: "
-				"logbuf size must be greater than or equal to log stripe size");
-			xfs_freesb(mp);
-			error = XFS_ERROR(EINVAL);
-			goto error3;
+			cmn_err(CE_WARN,
+	"XFS: logbuf size must be greater than or equal to log stripe size");
+			return XFS_ERROR(EINVAL);
 		}
 	} else {
 		/* Fail a mount if the logbuf is larger than 32K */
 		if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
-			cmn_err(CE_WARN, "XFS: "
-		"XFS: logbuf size for version 1 logs must be 16K or 32K");
-			xfs_freesb(mp);
-			error = XFS_ERROR(EINVAL);
-			goto error3;
+			cmn_err(CE_WARN,
+	"XFS: logbuf size for version 1 logs must be 16K or 32K");
+			return XFS_ERROR(EINVAL);
 		}
 	}

-	pagebuf_target_blocksize(mp->m_ddev_targp, mp->m_sb.sb_blocksize);
-	if (logdev != 0 && logdev != ddev)
-		pagebuf_target_blocksize(mp->m_logdev_targp, mp->m_sb.sb_blocksize);
-	if (rtdev != 0)
-		pagebuf_target_blocksize(mp->m_rtdev_targp, mp->m_sb.sb_blocksize);
-
 	/*
 	 * prohibit r/w mounts of read-only filesystems
 	 */
-	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) &&
-	    !(vfsp->vfs_flag & VFS_RDONLY)) {
-		cmn_err(CE_WARN, "XFS: "
-			"cannot mount a read-only filesystem as read-write");
-		error = XFS_ERROR(EROFS);
-		xfs_freesb(mp);
-		goto error3;
+	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+		cmn_err(CE_WARN,
+	"XFS: cannot mount a read-only filesystem as read-write");
+		return XFS_ERROR(EROFS);
+	}
+
+	/*
+	 * disallow mount attempts with (IRIX) project quota enabled
+	 */
+	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT)) {
+		cmn_err(CE_WARN,
+	"XFS: cannot mount a filesystem with IRIX project quota enabled");
+		return XFS_ERROR(ENOSYS);
 	}

 	/*
 	 * check for shared mount.
 	 */
-	if (ap && ap->flags & XFSMNT_SHARED) {
-		if (!XFS_SB_VERSION_HASSHARED(&mp->m_sb)) {
-			error = XFS_ERROR(EINVAL);
-			xfs_freesb(mp);
-			goto error3;
-		}
+	if (ap->flags & XFSMNT_SHARED) {
+		if (!XFS_SB_VERSION_HASSHARED(&mp->m_sb))
+			return XFS_ERROR(EINVAL);

 		/*
 		 * For IRIX 6.5, shared mounts must have the shared
@@ -451,32 +375,117 @@ xfs_cmountfs(
 		 * field set, must be version 0 and can only be mounted
 		 * read-only.
 		 */
-		if (!(vfsp->vfs_flag & VFS_RDONLY) ||
-		    !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
-		    mp->m_sb.sb_shared_vn != 0) {
-			error = XFS_ERROR(EINVAL);
-			xfs_freesb(mp);
-			goto error3;
-		}
+		if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
+		     (mp->m_sb.sb_shared_vn != 0))
+			return XFS_ERROR(EINVAL);

 		mp->m_flags |= XFS_MOUNT_SHARED;

 		/*
 		 * Shared XFS V0 can't deal with DMI.  Return EINVAL.
 		 */
-		if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) {
-			error = XFS_ERROR(EINVAL);
-			xfs_freesb(mp);
-			goto error3;
-		}
+		if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
+			return XFS_ERROR(EINVAL);
 	}

-	if ((error = xfs_mountfs(vfsp, mp, ddev, 0)) == 0)
-		return 0;
+	return 0;
+}
+
+/*
+ * xfs_cmountfs
+ *
+ * This function is the common mount file system function for XFS.
+ */
+STATIC int
+xfs_cmountfs(
+	vfs_t			*vfsp,
+	dev_t			ddev,
+	dev_t			logdev,
+	dev_t			rtdev,
+	struct xfs_mount_args	*ap,
+	struct cred		*cr)
+{
+	xfs_mount_t		*mp;
+	int			ronly = (vfsp->vfs_flag & VFS_RDONLY);
+	int			error = 0;
+
+	/*
+	 * Allocate VFS private data (xfs mount structure).
+	 */
+	mp = xfs_mount_init();
+
+	vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);

 	/*
-	 * Be careful not to clobber the value of 'error' here.
+	 * Open data, real time, and log devices now - order is important.
 	 */
+	mp->m_ddev_targp = pagebuf_lock_enable(ddev, 0);
+	if (IS_ERR(mp->m_ddev_targp)) {
+		error = PTR_ERR(mp->m_ddev_targp);
+		goto error2;
+	}
+
+	if (rtdev != 0) {
+		mp->m_rtdev_targp = pagebuf_lock_enable(rtdev, 1);
+		if (IS_ERR(mp->m_rtdev_targp)) {
+			error = PTR_ERR(mp->m_rtdev_targp);
+			pagebuf_lock_disable(mp->m_ddev_targp, 0);
+			goto error2;
+		}
+
+		if (rtdev == ddev || rtdev == logdev) {
+			cmn_err(CE_WARN,
+	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
+			error = EINVAL;
+			pagebuf_lock_disable(mp->m_ddev_targp, 0);
+			goto error2;
+		}
+		
+		/* Set the realtime device's block size */
+		set_blocksize(mp->m_rtdev_targp->pbr_bdev, 512);
+	}
+
+	if (logdev != ddev) {
+		mp->m_logdev_targp = pagebuf_lock_enable(logdev, 1);
+		if (IS_ERR(mp->m_logdev_targp)) {
+			error = PTR_ERR(mp->m_logdev_targp);
+			pagebuf_lock_disable(mp->m_ddev_targp, 1);
+			if (mp->m_rtdev_targp)
+				pagebuf_lock_disable(mp->m_rtdev_targp, 1);
+			goto error2;
+		}
+
+		/* Set the log device's block size */
+		set_blocksize(mp->m_logdev_targp->pbr_bdev, 512);
+	} else {
+		mp->m_logdev_targp = mp->m_ddev_targp;
+	}
+	
+	if ((error = xfs_start_flags(ap, mp, ronly)))
+		goto error3;
+
+	if ((error = xfs_readsb(mp)))
+		goto error3;
+
+	if ((error = xfs_finish_flags(ap, mp, ronly))) {
+		xfs_freesb(mp);
+		goto error3;
+	}
+
+	pagebuf_target_blocksize(mp->m_ddev_targp, mp->m_sb.sb_blocksize);
+	if (logdev != 0 && logdev != ddev)
+		pagebuf_target_blocksize(mp->m_logdev_targp,
+					mp->m_sb.sb_blocksize);
+	if (rtdev != 0)
+		pagebuf_target_blocksize(mp->m_rtdev_targp,
+					mp->m_sb.sb_blocksize);
+
+	mp->m_cxfstype = XFS_CXFS_NOT;
+	error = xfs_mountfs(vfsp, mp, ddev, 0);
+	if (error)
+		goto error3;
+	return 0;
+
 error3:
 	/* It's impossible to get here before buftargs are filled */
 	xfs_binval(mp->m_ddev_targp);

--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1144,138 +1144,70 @@ xfs_fsync(
 	 * (Note that xfs_inode_item_format() called at commit clears
 	 * the update_* fields.)
 	 */
-	if (!(flag & FSYNC_DATA)) {
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
+	xfs_ilock(ip, XFS_ILOCK_SHARED);

-		if (ip->i_update_core == 0)  {
-			/*
-			 * Timestamps/size haven't changed since last inode
-			 * flush or inode transaction commit.  That means
-			 * either nothing got written or a transaction
-			 * committed which caught the updates.	If the
-			 * latter happened and the transaction hasn't
-			 * hit the disk yet, the inode will be still
-			 * be pinned.  If it is, force the log.
-			 */
-			if (xfs_ipincount(ip) == 0)  {
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL |
-						XFS_ILOCK_SHARED);
-			} else	{
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL |
-						XFS_ILOCK_SHARED);
-				xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
-					      XFS_LOG_FORCE |
-					      ((flag & FSYNC_WAIT)
-					       ? XFS_LOG_SYNC : 0));
-			}
-			error = 0;
-		} else	{
-			/*
-			 * Kick off a transaction to log the inode
-			 * core to get the updates.  Make it
-			 * sync if FSYNC_WAIT is passed in (which
-			 * is done by everybody but specfs).  The
-			 * sync transaction will also force the log.
-			 */
-			xfs_iunlock(ip, XFS_ILOCK_SHARED);
-			tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
-			if ((error = xfs_trans_reserve(tp, 0,
-					XFS_FSYNC_TS_LOG_RES(ip->i_mount),
-					0, 0, 0)))  {
-				xfs_trans_cancel(tp, 0);
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-				return error;
-			}
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
+	/* If we are flushing data then we care about update_size
+	 * being set, otherwise we care about update_core
+	 */
+	if ((flag & FSYNC_DATA) ?
+			(ip->i_update_size == 0) :
+			(ip->i_update_core == 0)) {
+		/*
+		 * Timestamps/size haven't changed since last inode
+		 * flush or inode transaction commit.  That means
+		 * either nothing got written or a transaction
+		 * committed which caught the updates.	If the
+		 * latter happened and the transaction hasn't
+		 * hit the disk yet, the inode will be still
+		 * be pinned.  If it is, force the log.
+		 */

-			/*
-			 * Note - it's possible that we might have pushed
-			 * ourselves out of the way during trans_reserve
-			 * which would flush the inode.	 But there's no
-			 * guarantee that the inode buffer has actually
-			 * gone out yet (it's delwri).	Plus the buffer
-			 * could be pinned anyway if it's part of an
-			 * inode in another recent transaction.	 So we
-			 * play it safe and fire off the transaction anyway.
-			 */
-			xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
-			xfs_trans_ihold(tp, ip);
-			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-			if (flag & FSYNC_WAIT)
-				xfs_trans_set_sync(tp);
-			error = xfs_trans_commit(tp, 0, NULL);
-
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED);
+
+		if (xfs_ipincount(ip)) {
+			xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
+				      XFS_LOG_FORCE |
+				      ((flag & FSYNC_WAIT)
+				       ? XFS_LOG_SYNC : 0));
 		}
-	} else {
+		error = 0;
+	} else	{
 		/*
-		 * We don't care about the timestamps here.  We
-		 * only care about the size field growing on us
-		 * and forcing any space allocation transactions.
-		 * We have to flush changes to the size fields
-		 * otherwise we could write out data that
-		 * becomes inaccessible after a crash.
+		 * Kick off a transaction to log the inode
+		 * core to get the updates.  Make it
+		 * sync if FSYNC_WAIT is passed in (which
+		 * is done by everybody but specfs).  The
+		 * sync transaction will also force the log.
 		 */
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-		if (ip->i_update_size == 0)  {
-			/*
-			 * Force the log if the inode is pinned.
-			 * That ensures that all transactions committed
-			 * against the inode hit the disk.  This may do
-			 * too much work but it's safe.
-			 */
-			if (xfs_ipincount(ip) == 0)  {
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL |
-						XFS_ILOCK_SHARED);
-			} else	{
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL |
-						XFS_ILOCK_SHARED);
-				xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
-					      XFS_LOG_FORCE |
-					      ((flag & FSYNC_WAIT)
-					       ? XFS_LOG_SYNC : 0));
-			}
-			error = 0;
-		} else	{
-			/*
-			 * Kick off a sync transaction to log the inode
-			 * core.  The transaction has to be sync since
-			 * we need these updates to guarantee that the
-			 * data written will be seen.  The sync
-			 * transaction will also force the log.
-			 */
-			xfs_iunlock(ip, XFS_ILOCK_SHARED);
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+		tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
+		if ((error = xfs_trans_reserve(tp, 0,
+				XFS_FSYNC_TS_LOG_RES(ip->i_mount),
+				0, 0, 0)))  {
+			xfs_trans_cancel(tp, 0);
+			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			return error;
+		}
+		xfs_ilock(ip, XFS_ILOCK_EXCL);

-			tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
-			if ((error = xfs_trans_reserve(tp, 0,
-					XFS_FSYNC_TS_LOG_RES(ip->i_mount),
-					0, 0, 0)))  {
-				xfs_trans_cancel(tp, 0);
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-				return error;
-			}
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
+		/*
+		 * Note - it's possible that we might have pushed
+		 * ourselves out of the way during trans_reserve
+		 * which would flush the inode.	 But there's no
+		 * guarantee that the inode buffer has actually
+		 * gone out yet (it's delwri).	Plus the buffer
+		 * could be pinned anyway if it's part of an
+		 * inode in another recent transaction.	 So we
+		 * play it safe and fire off the transaction anyway.
+		 */
+		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
+		xfs_trans_ihold(tp, ip);
+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+		if (flag & FSYNC_WAIT)
+			xfs_trans_set_sync(tp);
+		error = xfs_trans_commit(tp, 0, NULL);

-			/*
-			 * Note - it's possible that we might have pushed
-			 * ourselves out of the way during trans_reserve
-			 * which would flush the inode.	 But there's no
-			 * guarantee that the inode buffer has actually
-			 * gone out yet (it's delwri).	Plus the buffer
-			 * could be pinned anyway if it's part of an
-			 * inode in another recent transaction.	 So we
-			 * play it safe and fire off the transaction anyway.
-			 */
-			xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
-			xfs_trans_ihold(tp, ip);
-			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-			if (flag & FSYNC_WAIT)
-				xfs_trans_set_sync(tp);
-			error = xfs_trans_commit(tp, 0, NULL);
-
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
-		}
+		xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
 	}
 	return error;
 }
@@ -1958,7 +1890,6 @@ xfs_lookup(
 	xfs_ino_t		e_inum;
 	int			error;
 	uint			lock_mode;
-	uint			lookup_flags;
 	vnode_t			*dir_vp;

 	dir_vp = BHV_TO_VNODE(dir_bdp);
@@ -1971,12 +1902,7 @@ xfs_lookup(
 		return XFS_ERROR(EIO);

 	lock_mode = xfs_ilock_map_shared(dp);
-
-	lookup_flags = DLF_IGET;
-	if (lock_mode == XFS_ILOCK_SHARED) {
-		lookup_flags |= DLF_LOCK_SHARED;
-	}
-	error = xfs_dir_lookup_int(dir_bdp, lookup_flags, dentry, &e_inum, &ip);
+	error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip);
 	if (error) {
 		xfs_iunlock_map_shared(dp, lock_mode);
 		return error;