Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
88e8fda9
Commit
88e8fda9
authored
Feb 24, 2015
by
Dave Chinner
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'xfs-mmap-lock' into for-next
parents
4225441a
723cac48
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
217 additions
and
114 deletions
+217
-114
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_bmap_util.c
+15
-16
fs/xfs/xfs_file.c
fs/xfs/xfs_file.c
+54
-16
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.c
+97
-31
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode.h
+22
-7
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl.c
+4
-1
fs/xfs/xfs_iops.c
fs/xfs/xfs_iops.c
+20
-43
fs/xfs/xfs_super.c
fs/xfs/xfs_super.c
+2
-0
fs/xfs/xfs_trace.h
fs/xfs/xfs_trace.h
+3
-0
No files found.
fs/xfs/xfs_bmap_util.c
View file @
88e8fda9
...
...
@@ -1599,13 +1599,6 @@ xfs_swap_extent_flush(
/* Verify O_DIRECT for ftmp */
if
(
VFS_I
(
ip
)
->
i_mapping
->
nrpages
)
return
-
EINVAL
;
/*
* Don't try to swap extents on mmap()d files because we can't lock
* out races against page faults safely.
*/
if
(
mapping_mapped
(
VFS_I
(
ip
)
->
i_mapping
))
return
-
EBUSY
;
return
0
;
}
...
...
@@ -1633,13 +1626,14 @@ xfs_swap_extents(
}
/*
* Lock
up the inodes against other IO and truncate to begin with.
*
Then we can ensure the inodes are flushed and have no page cache
*
safely. Once we have done this we can take the ilocks and do the rest
* of the checks.
* Lock
the inodes against other IO, page faults and truncate to
*
begin with. Then we can ensure the inodes are flushed and have no
*
page cache safely. Once we have done this we can take the ilocks and
*
do the rest
of the checks.
*/
lock_flags
=
XFS_IOLOCK_EXCL
;
lock_flags
=
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
;
xfs_lock_two_inodes
(
ip
,
tip
,
XFS_IOLOCK_EXCL
);
xfs_lock_two_inodes
(
ip
,
tip
,
XFS_MMAPLOCK_EXCL
);
/* Verify that both files have the same format */
if
((
ip
->
i_d
.
di_mode
&
S_IFMT
)
!=
(
tip
->
i_d
.
di_mode
&
S_IFMT
))
{
...
...
@@ -1666,8 +1660,16 @@ xfs_swap_extents(
xfs_trans_cancel
(
tp
,
0
);
goto
out_unlock
;
}
/*
* Lock and join the inodes to the tansaction so that transaction commit
* or cancel will unlock the inodes from this point onwards.
*/
xfs_lock_two_inodes
(
ip
,
tip
,
XFS_ILOCK_EXCL
);
lock_flags
|=
XFS_ILOCK_EXCL
;
xfs_trans_ijoin
(
tp
,
ip
,
lock_flags
);
xfs_trans_ijoin
(
tp
,
tip
,
lock_flags
);
/* Verify all data are being swapped */
if
(
sxp
->
sx_offset
!=
0
||
...
...
@@ -1720,9 +1722,6 @@ xfs_swap_extents(
goto
out_trans_cancel
;
}
xfs_trans_ijoin
(
tp
,
ip
,
lock_flags
);
xfs_trans_ijoin
(
tp
,
tip
,
lock_flags
);
/*
* Before we've swapped the forks, lets set the owners of the forks
* appropriately. We have to do this as we are demand paging the btree
...
...
@@ -1856,5 +1855,5 @@ xfs_swap_extents(
out_trans_cancel:
xfs_trans_cancel
(
tp
,
0
);
goto
out
_unlock
;
goto
out
;
}
fs/xfs/xfs_file.c
View file @
88e8fda9
...
...
@@ -847,6 +847,9 @@ xfs_file_fallocate(
if
(
error
)
goto
out_unlock
;
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
if
(
mode
&
FALLOC_FL_PUNCH_HOLE
)
{
error
=
xfs_free_file_space
(
ip
,
offset
,
len
);
if
(
error
)
...
...
@@ -996,20 +999,6 @@ xfs_file_mmap(
return
0
;
}
/*
* mmap()d file has taken write protection fault and is being made
* writable. We can set the page state up correctly for a writable
* page, which means we can do correct delalloc accounting (ENOSPC
* checking!) and unwritten extent mapping.
*/
STATIC
int
xfs_vm_page_mkwrite
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
{
return
block_page_mkwrite
(
vma
,
vmf
,
xfs_get_blocks
);
}
/*
* This type is designed to indicate the type of offset we would like
* to search from page cache for xfs_seek_hole_data().
...
...
@@ -1385,6 +1374,55 @@ xfs_file_llseek(
}
}
/*
* Locking for serialisation of IO during page faults. This results in a lock
* ordering of:
*
* mmap_sem (MM)
* i_mmap_lock (XFS - truncate serialisation)
* page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/
STATIC
int
xfs_filemap_fault
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
vma
->
vm_file
->
f_mapping
->
host
);
int
error
;
trace_xfs_filemap_fault
(
ip
);
xfs_ilock
(
ip
,
XFS_MMAPLOCK_SHARED
);
error
=
filemap_fault
(
vma
,
vmf
);
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_SHARED
);
return
error
;
}
/*
* mmap()d file has taken write protection fault and is being made writable. We
* can set the page state up correctly for a writable page, which means we can
* do correct delalloc accounting (ENOSPC checking!) and unwritten extent
* mapping.
*/
STATIC
int
xfs_filemap_page_mkwrite
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
vma
->
vm_file
->
f_mapping
->
host
);
int
error
;
trace_xfs_filemap_page_mkwrite
(
ip
);
xfs_ilock
(
ip
,
XFS_MMAPLOCK_SHARED
);
error
=
block_page_mkwrite
(
vma
,
vmf
,
xfs_get_blocks
);
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_SHARED
);
return
error
;
}
const
struct
file_operations
xfs_file_operations
=
{
.
llseek
=
xfs_file_llseek
,
.
read
=
new_sync_read
,
...
...
@@ -1417,7 +1455,7 @@ const struct file_operations xfs_dir_file_operations = {
};
static
const
struct
vm_operations_struct
xfs_file_vm_ops
=
{
.
fault
=
filemap_fault
,
.
fault
=
xfs_
filemap_fault
,
.
map_pages
=
filemap_map_pages
,
.
page_mkwrite
=
xfs_
vm
_page_mkwrite
,
.
page_mkwrite
=
xfs_
filemap
_page_mkwrite
,
};
fs/xfs/xfs_inode.c
View file @
88e8fda9
...
...
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
}
/*
* The xfs inode contains
2 locks: a multi-reader lock called the
*
i_iolock and a multi-reader lock called the i_lock. This routin
e
*
allows either or both of the locks to be
obtained.
* The xfs inode contains
3 multi-reader locks: the i_iolock the i_mmap_lock and
*
the i_lock. This routine allows various combinations of the locks to b
e
* obtained.
*
* The
2 locks should always be ordered so that the IO lock is
*
obtained fir
st in order to prevent deadlock.
* The
3 locks should always be ordered so that the IO lock is obtained first,
*
the mmap lock second and the ilock la
st in order to prevent deadlock.
*
* ip -- the inode being locked
* lock_flags -- this parameter indicates the inode's locks
* to be locked. It can be:
* XFS_IOLOCK_SHARED,
* XFS_IOLOCK_EXCL,
* XFS_ILOCK_SHARED,
* XFS_ILOCK_EXCL,
* XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
* XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
* XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
* XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
* Basic locking order:
*
* i_iolock -> i_mmap_lock -> page_lock -> i_ilock
*
* mmap_sem locking order:
*
* i_iolock -> page lock -> mmap_sem
* mmap_sem -> i_mmap_lock -> page_lock
*
* The difference in mmap_sem locking order mean that we cannot hold the
* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
* fault in pages during copy in/out (for buffered IO) or require the mmap_sem
* in get_user_pages() to map the user pages into the kernel address space for
* direct IO. Similarly the i_iolock cannot be taken inside a page fault because
* page faults already hold the mmap_sem.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
* take both the i_iolock and the i_mmap_lock. These locks should *only* be both
* taken in places where we need to invalidate the page cache in a race
* free manner (e.g. truncate, hole punch and other extent manipulation
* functions).
*/
void
xfs_ilock
(
...
...
@@ -150,6 +160,8 @@ xfs_ilock(
*/
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
!=
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
...
...
@@ -159,6 +171,11 @@ xfs_ilock(
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
mraccess_nested
(
&
ip
->
i_iolock
,
XFS_IOLOCK_DEP
(
lock_flags
));
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
mrupdate_nested
(
&
ip
->
i_mmaplock
,
XFS_MMAPLOCK_DEP
(
lock_flags
));
else
if
(
lock_flags
&
XFS_MMAPLOCK_SHARED
)
mraccess_nested
(
&
ip
->
i_mmaplock
,
XFS_MMAPLOCK_DEP
(
lock_flags
));
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
mrupdate_nested
(
&
ip
->
i_lock
,
XFS_ILOCK_DEP
(
lock_flags
));
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
...
...
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
*/
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
!=
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
...
...
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
if
(
!
mrtryaccess
(
&
ip
->
i_iolock
))
goto
out
;
}
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
{
if
(
!
mrtryupdate
(
&
ip
->
i_mmaplock
))
goto
out_undo_iolock
;
}
else
if
(
lock_flags
&
XFS_MMAPLOCK_SHARED
)
{
if
(
!
mrtryaccess
(
&
ip
->
i_mmaplock
))
goto
out_undo_iolock
;
}
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
{
if
(
!
mrtryupdate
(
&
ip
->
i_lock
))
goto
out_undo_
io
lock
;
goto
out_undo_
mmap
lock
;
}
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
{
if
(
!
mrtryaccess
(
&
ip
->
i_lock
))
goto
out_undo_
io
lock
;
goto
out_undo_
mmap
lock
;
}
return
1
;
out_undo_iolock:
out_undo_mmaplock:
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
mrunlock_excl
(
&
ip
->
i_mmaplock
);
else
if
(
lock_flags
&
XFS_MMAPLOCK_SHARED
)
mrunlock_shared
(
&
ip
->
i_mmaplock
);
out_undo_iolock:
if
(
lock_flags
&
XFS_IOLOCK_EXCL
)
mrunlock_excl
(
&
ip
->
i_iolock
);
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
mrunlock_shared
(
&
ip
->
i_iolock
);
out:
out:
return
0
;
}
...
...
@@ -244,6 +277,8 @@ xfs_iunlock(
*/
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
!=
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
...
...
@@ -254,6 +289,11 @@ xfs_iunlock(
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
mrunlock_shared
(
&
ip
->
i_iolock
);
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
mrunlock_excl
(
&
ip
->
i_mmaplock
);
else
if
(
lock_flags
&
XFS_MMAPLOCK_SHARED
)
mrunlock_shared
(
&
ip
->
i_mmaplock
);
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
mrunlock_excl
(
&
ip
->
i_lock
);
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
...
...
@@ -271,11 +311,14 @@ xfs_ilock_demote(
xfs_inode_t
*
ip
,
uint
lock_flags
)
{
ASSERT
(
lock_flags
&
(
XFS_IOLOCK_EXCL
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_IOLOCK_EXCL
|
XFS_ILOCK_EXCL
))
==
0
);
ASSERT
(
lock_flags
&
(
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
|
XFS_ILOCK_EXCL
))
==
0
);
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
mrdemote
(
&
ip
->
i_lock
);
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
mrdemote
(
&
ip
->
i_mmaplock
);
if
(
lock_flags
&
XFS_IOLOCK_EXCL
)
mrdemote
(
&
ip
->
i_iolock
);
...
...
@@ -294,6 +337,12 @@ xfs_isilocked(
return
rwsem_is_locked
(
&
ip
->
i_lock
.
mr_lock
);
}
if
(
lock_flags
&
(
XFS_MMAPLOCK_EXCL
|
XFS_MMAPLOCK_SHARED
))
{
if
(
!
(
lock_flags
&
XFS_MMAPLOCK_SHARED
))
return
!!
ip
->
i_mmaplock
.
mr_writer
;
return
rwsem_is_locked
(
&
ip
->
i_mmaplock
.
mr_lock
);
}
if
(
lock_flags
&
(
XFS_IOLOCK_EXCL
|
XFS_IOLOCK_SHARED
))
{
if
(
!
(
lock_flags
&
XFS_IOLOCK_SHARED
))
return
!!
ip
->
i_iolock
.
mr_writer
;
...
...
@@ -314,14 +363,27 @@ int xfs_lock_delays;
#endif
/*
* Bump the subclass so xfs_lock_inodes() acquires each lock with
* a different value
* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
* value. This shouldn't be called for page fault locking, but we also need to
* ensure we don't overrun the number of lockdep subclasses for the iolock or
* mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
*/
static
inline
int
xfs_lock_inumorder
(
int
lock_mode
,
int
subclass
)
{
if
(
lock_mode
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
if
(
lock_mode
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
{
ASSERT
(
subclass
+
XFS_LOCK_INUMORDER
<
(
1
<<
(
XFS_MMAPLOCK_SHIFT
-
XFS_IOLOCK_SHIFT
)));
lock_mode
|=
(
subclass
+
XFS_LOCK_INUMORDER
)
<<
XFS_IOLOCK_SHIFT
;
}
if
(
lock_mode
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
{
ASSERT
(
subclass
+
XFS_LOCK_INUMORDER
<
(
1
<<
(
XFS_ILOCK_SHIFT
-
XFS_MMAPLOCK_SHIFT
)));
lock_mode
|=
(
subclass
+
XFS_LOCK_INUMORDER
)
<<
XFS_MMAPLOCK_SHIFT
;
}
if
(
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
lock_mode
|=
(
subclass
+
XFS_LOCK_INUMORDER
)
<<
XFS_ILOCK_SHIFT
;
...
...
@@ -440,10 +502,10 @@ xfs_lock_inodes(
}
/*
* xfs_lock_two_inodes() can only be used to lock one type of lock
*
at a time - the iolock or the ilock, but not both at once. If
*
we lock both at once, lockdep will report false positives saying
*
we
have violated locking orders.
* xfs_lock_two_inodes() can only be used to lock one type of lock
at a time -
*
the iolock, the mmaplock or the ilock, but not more than one at a time. If we
*
lock more than one at a time, lockdep will report false positives saying we
* have violated locking orders.
*/
void
xfs_lock_two_inodes
(
...
...
@@ -455,8 +517,12 @@ xfs_lock_two_inodes(
int
attempts
=
0
;
xfs_log_item_t
*
lp
;
if
(
lock_mode
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
ASSERT
((
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
==
0
);
if
(
lock_mode
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
{
ASSERT
(
!
(
lock_mode
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
)));
ASSERT
(
!
(
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
)));
}
else
if
(
lock_mode
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
ASSERT
(
!
(
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
)));
ASSERT
(
ip0
->
i_ino
!=
ip1
->
i_ino
);
if
(
ip0
->
i_ino
>
ip1
->
i_ino
)
{
...
...
fs/xfs/xfs_inode.h
View file @
88e8fda9
...
...
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
struct
xfs_inode_log_item
*
i_itemp
;
/* logging information */
mrlock_t
i_lock
;
/* inode lock */
mrlock_t
i_iolock
;
/* inode IO lock */
mrlock_t
i_mmaplock
;
/* inode mmap IO lock */
atomic_t
i_pincount
;
/* inode pin count */
spinlock_t
i_flags_lock
;
/* inode i_flags lock */
/* Miscellaneous state. */
...
...
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_SHARED (1<<1)
#define XFS_ILOCK_EXCL (1<<2)
#define XFS_ILOCK_SHARED (1<<3)
#define XFS_MMAPLOCK_EXCL (1<<4)
#define XFS_MMAPLOCK_SHARED (1<<5)
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
#define XFS_LOCK_FLAGS \
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
{ XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
{ XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
/*
...
...
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_SHIFT 16
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
#define XFS_MMAPLOCK_SHIFT 20
#define XFS_ILOCK_SHIFT 24
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
#define XFS_IOLOCK_DEP_MASK 0x00ff0000
#define XFS_IOLOCK_DEP_MASK 0x000f0000
#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
#define XFS_ILOCK_DEP_MASK 0xff000000
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
XFS_MMAPLOCK_DEP_MASK | \
XFS_ILOCK_DEP_MASK)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
>> XFS_IOLOCK_SHIFT)
#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
>> XFS_MMAPLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
>> XFS_ILOCK_SHIFT)
/*
* For multiple groups support: if S_ISGID bit is set in the parent
...
...
fs/xfs/xfs_ioctl.c
View file @
88e8fda9
...
...
@@ -631,7 +631,7 @@ xfs_ioc_space(
if
(
filp
->
f_flags
&
O_DSYNC
)
flags
|=
XFS_PREALLOC_SYNC
;
if
(
ioflags
&
XFS_IO_INVIS
)
if
(
ioflags
&
XFS_IO_INVIS
)
flags
|=
XFS_PREALLOC_INVISIBLE
;
error
=
mnt_want_write_file
(
filp
);
...
...
@@ -643,6 +643,9 @@ xfs_ioc_space(
if
(
error
)
goto
out_unlock
;
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
switch
(
bf
->
l_whence
)
{
case
0
:
/*SEEK_SET*/
break
;
...
...
fs/xfs/xfs_iops.c
View file @
88e8fda9
...
...
@@ -771,6 +771,7 @@ xfs_setattr_size(
return
error
;
ASSERT
(
xfs_isilocked
(
ip
,
XFS_IOLOCK_EXCL
));
ASSERT
(
xfs_isilocked
(
ip
,
XFS_MMAPLOCK_EXCL
));
ASSERT
(
S_ISREG
(
ip
->
i_d
.
di_mode
));
ASSERT
((
iattr
->
ia_valid
&
(
ATTR_UID
|
ATTR_GID
|
ATTR_ATIME
|
ATTR_ATIME_SET
|
ATTR_MTIME_SET
|
ATTR_KILL_PRIV
|
ATTR_TIMES_SET
))
==
0
);
...
...
@@ -834,55 +835,27 @@ xfs_setattr_size(
inode_dio_wait
(
inode
);
/*
* Do all the page cache truncate work outside the transaction context
* as the "lock" order is page lock->log space reservation. i.e.
* locking pages inside the transaction can ABBA deadlock with
* writeback. We have to do the VFS inode size update before we truncate
* the pagecache, however, to avoid racing with page faults beyond the
* new EOF they are not serialised against truncate operations except by
* page locks and size updates.
* We've already locked out new page faults, so now we can safely remove
* pages from the page cache knowing they won't get refaulted until we
* drop the XFS_MMAP_EXCL lock after the extent manipulations are
* complete. The truncate_setsize() call also cleans partial EOF page
* PTEs on extending truncates and hence ensures sub-page block size
* filesystems are correctly handled, too.
*
* Hence we are in a situation where a truncate can fail with ENOMEM
* from xfs_trans_reserve(), but having already truncated the in-memory
* version of the file (i.e. made user visible changes). There's not
* much we can do about this, except to hope that the caller sees ENOMEM
* and retries the truncate operation.
* We have to do all the page cache truncate work outside the
* transaction context as the "lock" order is page lock->log space
* reservation as defined by extent allocation in the writeback path.
* Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
* having already truncated the in-memory version of the file (i.e. made
* user visible changes). There's not much we can do about this, except
* to hope that the caller sees ENOMEM and retries the truncate
* operation.
*/
error
=
block_truncate_page
(
inode
->
i_mapping
,
newsize
,
xfs_get_blocks
);
if
(
error
)
return
error
;
truncate_setsize
(
inode
,
newsize
);
/*
* The "we can't serialise against page faults" pain gets worse.
*
* If the file is mapped then we have to clean the page at the old EOF
* when extending the file. Extending the file can expose changes the
* underlying page mapping (e.g. from beyond EOF to a hole or
* unwritten), and so on the next attempt to write to that page we need
* to remap it for write. i.e. we need .page_mkwrite() to be called.
* Hence we need to clean the page to clean the pte and so a new write
* fault will be triggered appropriately.
*
* If we do it before we change the inode size, then we can race with a
* page fault that maps the page with exactly the same problem. If we do
* it after we change the file size, then a new page fault can come in
* and allocate space before we've run the rest of the truncate
* transaction. That's kinda grotesque, but it's better than have data
* over a hole, and so that's the lesser evil that has been chosen here.
*
* The real solution, however, is to have some mechanism for locking out
* page faults while a truncate is in progress.
*/
if
(
newsize
>
oldsize
&&
mapping_mapped
(
VFS_I
(
ip
)
->
i_mapping
))
{
error
=
filemap_write_and_wait_range
(
VFS_I
(
ip
)
->
i_mapping
,
round_down
(
oldsize
,
PAGE_CACHE_SIZE
),
round_up
(
oldsize
,
PAGE_CACHE_SIZE
)
-
1
);
if
(
error
)
return
error
;
}
tp
=
xfs_trans_alloc
(
mp
,
XFS_TRANS_SETATTR_SIZE
);
error
=
xfs_trans_reserve
(
tp
,
&
M_RES
(
mp
)
->
tr_itruncate
,
0
,
0
);
if
(
error
)
...
...
@@ -981,8 +954,12 @@ xfs_vn_setattr(
xfs_ilock
(
ip
,
iolock
);
error
=
xfs_break_layouts
(
dentry
->
d_inode
,
&
iolock
);
if
(
!
error
)
if
(
!
error
)
{
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
error
=
xfs_setattr_size
(
ip
,
iattr
);
}
xfs_iunlock
(
ip
,
iolock
);
}
else
{
error
=
xfs_setattr_nonsize
(
ip
,
iattr
,
0
);
...
...
fs/xfs/xfs_super.c
View file @
88e8fda9
...
...
@@ -966,6 +966,8 @@ xfs_fs_inode_init_once(
atomic_set
(
&
ip
->
i_pincount
,
0
);
spin_lock_init
(
&
ip
->
i_flags_lock
);
mrlock_init
(
&
ip
->
i_mmaplock
,
MRLOCK_ALLOW_EQUAL_PRI
|
MRLOCK_BARRIER
,
"xfsino"
,
ip
->
i_ino
);
mrlock_init
(
&
ip
->
i_lock
,
MRLOCK_ALLOW_EQUAL_PRI
|
MRLOCK_BARRIER
,
"xfsino"
,
ip
->
i_ino
);
}
...
...
fs/xfs/xfs_trace.h
View file @
88e8fda9
...
...
@@ -685,6 +685,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
DEFINE_INODE_EVENT
(
xfs_inode_clear_eofblocks_tag
);
DEFINE_INODE_EVENT
(
xfs_inode_free_eofblocks_invalid
);
DEFINE_INODE_EVENT
(
xfs_filemap_fault
);
DEFINE_INODE_EVENT
(
xfs_filemap_page_mkwrite
);
DECLARE_EVENT_CLASS
(
xfs_iref_class
,
TP_PROTO
(
struct
xfs_inode
*
ip
,
unsigned
long
caller_ip
),
TP_ARGS
(
ip
,
caller_ip
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment