Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
88e8fda9
Commit
88e8fda9
authored
Feb 24, 2015
by
Dave Chinner
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'xfs-mmap-lock' into for-next
parents
4225441a
723cac48
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
217 additions
and
114 deletions
+217
-114
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_bmap_util.c
+15
-16
fs/xfs/xfs_file.c
fs/xfs/xfs_file.c
+54
-16
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.c
+97
-31
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode.h
+22
-7
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl.c
+4
-1
fs/xfs/xfs_iops.c
fs/xfs/xfs_iops.c
+20
-43
fs/xfs/xfs_super.c
fs/xfs/xfs_super.c
+2
-0
fs/xfs/xfs_trace.h
fs/xfs/xfs_trace.h
+3
-0
No files found.
fs/xfs/xfs_bmap_util.c
View file @
88e8fda9
...
@@ -1599,13 +1599,6 @@ xfs_swap_extent_flush(
...
@@ -1599,13 +1599,6 @@ xfs_swap_extent_flush(
/* Verify O_DIRECT for ftmp */
/* Verify O_DIRECT for ftmp */
if
(
VFS_I
(
ip
)
->
i_mapping
->
nrpages
)
if
(
VFS_I
(
ip
)
->
i_mapping
->
nrpages
)
return
-
EINVAL
;
return
-
EINVAL
;
/*
* Don't try to swap extents on mmap()d files because we can't lock
* out races against page faults safely.
*/
if
(
mapping_mapped
(
VFS_I
(
ip
)
->
i_mapping
))
return
-
EBUSY
;
return
0
;
return
0
;
}
}
...
@@ -1633,13 +1626,14 @@ xfs_swap_extents(
...
@@ -1633,13 +1626,14 @@ xfs_swap_extents(
}
}
/*
/*
* Lock
up the inodes against other IO and truncate to begin with.
* Lock
the inodes against other IO, page faults and truncate to
*
Then we can ensure the inodes are flushed and have no page cache
*
begin with. Then we can ensure the inodes are flushed and have no
*
safely. Once we have done this we can take the ilocks and do the rest
*
page cache safely. Once we have done this we can take the ilocks and
* of the checks.
*
do the rest
of the checks.
*/
*/
lock_flags
=
XFS_IOLOCK_EXCL
;
lock_flags
=
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
;
xfs_lock_two_inodes
(
ip
,
tip
,
XFS_IOLOCK_EXCL
);
xfs_lock_two_inodes
(
ip
,
tip
,
XFS_IOLOCK_EXCL
);
xfs_lock_two_inodes
(
ip
,
tip
,
XFS_MMAPLOCK_EXCL
);
/* Verify that both files have the same format */
/* Verify that both files have the same format */
if
((
ip
->
i_d
.
di_mode
&
S_IFMT
)
!=
(
tip
->
i_d
.
di_mode
&
S_IFMT
))
{
if
((
ip
->
i_d
.
di_mode
&
S_IFMT
)
!=
(
tip
->
i_d
.
di_mode
&
S_IFMT
))
{
...
@@ -1666,8 +1660,16 @@ xfs_swap_extents(
...
@@ -1666,8 +1660,16 @@ xfs_swap_extents(
xfs_trans_cancel
(
tp
,
0
);
xfs_trans_cancel
(
tp
,
0
);
goto
out_unlock
;
goto
out_unlock
;
}
}
/*
* Lock and join the inodes to the tansaction so that transaction commit
* or cancel will unlock the inodes from this point onwards.
*/
xfs_lock_two_inodes
(
ip
,
tip
,
XFS_ILOCK_EXCL
);
xfs_lock_two_inodes
(
ip
,
tip
,
XFS_ILOCK_EXCL
);
lock_flags
|=
XFS_ILOCK_EXCL
;
lock_flags
|=
XFS_ILOCK_EXCL
;
xfs_trans_ijoin
(
tp
,
ip
,
lock_flags
);
xfs_trans_ijoin
(
tp
,
tip
,
lock_flags
);
/* Verify all data are being swapped */
/* Verify all data are being swapped */
if
(
sxp
->
sx_offset
!=
0
||
if
(
sxp
->
sx_offset
!=
0
||
...
@@ -1720,9 +1722,6 @@ xfs_swap_extents(
...
@@ -1720,9 +1722,6 @@ xfs_swap_extents(
goto
out_trans_cancel
;
goto
out_trans_cancel
;
}
}
xfs_trans_ijoin
(
tp
,
ip
,
lock_flags
);
xfs_trans_ijoin
(
tp
,
tip
,
lock_flags
);
/*
/*
* Before we've swapped the forks, lets set the owners of the forks
* Before we've swapped the forks, lets set the owners of the forks
* appropriately. We have to do this as we are demand paging the btree
* appropriately. We have to do this as we are demand paging the btree
...
@@ -1856,5 +1855,5 @@ xfs_swap_extents(
...
@@ -1856,5 +1855,5 @@ xfs_swap_extents(
out_trans_cancel:
out_trans_cancel:
xfs_trans_cancel
(
tp
,
0
);
xfs_trans_cancel
(
tp
,
0
);
goto
out
_unlock
;
goto
out
;
}
}
fs/xfs/xfs_file.c
View file @
88e8fda9
...
@@ -847,6 +847,9 @@ xfs_file_fallocate(
...
@@ -847,6 +847,9 @@ xfs_file_fallocate(
if
(
error
)
if
(
error
)
goto
out_unlock
;
goto
out_unlock
;
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
if
(
mode
&
FALLOC_FL_PUNCH_HOLE
)
{
if
(
mode
&
FALLOC_FL_PUNCH_HOLE
)
{
error
=
xfs_free_file_space
(
ip
,
offset
,
len
);
error
=
xfs_free_file_space
(
ip
,
offset
,
len
);
if
(
error
)
if
(
error
)
...
@@ -996,20 +999,6 @@ xfs_file_mmap(
...
@@ -996,20 +999,6 @@ xfs_file_mmap(
return
0
;
return
0
;
}
}
/*
* mmap()d file has taken write protection fault and is being made
* writable. We can set the page state up correctly for a writable
* page, which means we can do correct delalloc accounting (ENOSPC
* checking!) and unwritten extent mapping.
*/
STATIC
int
xfs_vm_page_mkwrite
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
{
return
block_page_mkwrite
(
vma
,
vmf
,
xfs_get_blocks
);
}
/*
/*
* This type is designed to indicate the type of offset we would like
* This type is designed to indicate the type of offset we would like
* to search from page cache for xfs_seek_hole_data().
* to search from page cache for xfs_seek_hole_data().
...
@@ -1385,6 +1374,55 @@ xfs_file_llseek(
...
@@ -1385,6 +1374,55 @@ xfs_file_llseek(
}
}
}
}
/*
* Locking for serialisation of IO during page faults. This results in a lock
* ordering of:
*
* mmap_sem (MM)
* i_mmap_lock (XFS - truncate serialisation)
* page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/
STATIC
int
xfs_filemap_fault
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
vma
->
vm_file
->
f_mapping
->
host
);
int
error
;
trace_xfs_filemap_fault
(
ip
);
xfs_ilock
(
ip
,
XFS_MMAPLOCK_SHARED
);
error
=
filemap_fault
(
vma
,
vmf
);
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_SHARED
);
return
error
;
}
/*
* mmap()d file has taken write protection fault and is being made writable. We
* can set the page state up correctly for a writable page, which means we can
* do correct delalloc accounting (ENOSPC checking!) and unwritten extent
* mapping.
*/
STATIC
int
xfs_filemap_page_mkwrite
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
vma
->
vm_file
->
f_mapping
->
host
);
int
error
;
trace_xfs_filemap_page_mkwrite
(
ip
);
xfs_ilock
(
ip
,
XFS_MMAPLOCK_SHARED
);
error
=
block_page_mkwrite
(
vma
,
vmf
,
xfs_get_blocks
);
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_SHARED
);
return
error
;
}
const
struct
file_operations
xfs_file_operations
=
{
const
struct
file_operations
xfs_file_operations
=
{
.
llseek
=
xfs_file_llseek
,
.
llseek
=
xfs_file_llseek
,
.
read
=
new_sync_read
,
.
read
=
new_sync_read
,
...
@@ -1417,7 +1455,7 @@ const struct file_operations xfs_dir_file_operations = {
...
@@ -1417,7 +1455,7 @@ const struct file_operations xfs_dir_file_operations = {
};
};
static
const
struct
vm_operations_struct
xfs_file_vm_ops
=
{
static
const
struct
vm_operations_struct
xfs_file_vm_ops
=
{
.
fault
=
filemap_fault
,
.
fault
=
xfs_
filemap_fault
,
.
map_pages
=
filemap_map_pages
,
.
map_pages
=
filemap_map_pages
,
.
page_mkwrite
=
xfs_
vm
_page_mkwrite
,
.
page_mkwrite
=
xfs_
filemap
_page_mkwrite
,
};
};
fs/xfs/xfs_inode.c
View file @
88e8fda9
...
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
...
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
}
}
/*
/*
* The xfs inode contains
2 locks: a multi-reader lock called the
* The xfs inode contains
3 multi-reader locks: the i_iolock the i_mmap_lock and
*
i_iolock and a multi-reader lock called the i_lock. This routin
e
*
the i_lock. This routine allows various combinations of the locks to b
e
*
allows either or both of the locks to be
obtained.
* obtained.
*
*
* The
2 locks should always be ordered so that the IO lock is
* The
3 locks should always be ordered so that the IO lock is obtained first,
*
obtained fir
st in order to prevent deadlock.
*
the mmap lock second and the ilock la
st in order to prevent deadlock.
*
*
* ip -- the inode being locked
* Basic locking order:
* lock_flags -- this parameter indicates the inode's locks
*
* to be locked. It can be:
* i_iolock -> i_mmap_lock -> page_lock -> i_ilock
* XFS_IOLOCK_SHARED,
*
* XFS_IOLOCK_EXCL,
* mmap_sem locking order:
* XFS_ILOCK_SHARED,
*
* XFS_ILOCK_EXCL,
* i_iolock -> page lock -> mmap_sem
* XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
* mmap_sem -> i_mmap_lock -> page_lock
* XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
*
* XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
* The difference in mmap_sem locking order mean that we cannot hold the
* XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
* fault in pages during copy in/out (for buffered IO) or require the mmap_sem
* in get_user_pages() to map the user pages into the kernel address space for
* direct IO. Similarly the i_iolock cannot be taken inside a page fault because
* page faults already hold the mmap_sem.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
* take both the i_iolock and the i_mmap_lock. These locks should *only* be both
* taken in places where we need to invalidate the page cache in a race
* free manner (e.g. truncate, hole punch and other extent manipulation
* functions).
*/
*/
void
void
xfs_ilock
(
xfs_ilock
(
...
@@ -150,6 +160,8 @@ xfs_ilock(
...
@@ -150,6 +160,8 @@ xfs_ilock(
*/
*/
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
!=
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
...
@@ -159,6 +171,11 @@ xfs_ilock(
...
@@ -159,6 +171,11 @@ xfs_ilock(
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
mraccess_nested
(
&
ip
->
i_iolock
,
XFS_IOLOCK_DEP
(
lock_flags
));
mraccess_nested
(
&
ip
->
i_iolock
,
XFS_IOLOCK_DEP
(
lock_flags
));
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
mrupdate_nested
(
&
ip
->
i_mmaplock
,
XFS_MMAPLOCK_DEP
(
lock_flags
));
else
if
(
lock_flags
&
XFS_MMAPLOCK_SHARED
)
mraccess_nested
(
&
ip
->
i_mmaplock
,
XFS_MMAPLOCK_DEP
(
lock_flags
));
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
mrupdate_nested
(
&
ip
->
i_lock
,
XFS_ILOCK_DEP
(
lock_flags
));
mrupdate_nested
(
&
ip
->
i_lock
,
XFS_ILOCK_DEP
(
lock_flags
));
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
...
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
...
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
*/
*/
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
!=
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
...
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
...
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
if
(
!
mrtryaccess
(
&
ip
->
i_iolock
))
if
(
!
mrtryaccess
(
&
ip
->
i_iolock
))
goto
out
;
goto
out
;
}
}
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
{
if
(
!
mrtryupdate
(
&
ip
->
i_mmaplock
))
goto
out_undo_iolock
;
}
else
if
(
lock_flags
&
XFS_MMAPLOCK_SHARED
)
{
if
(
!
mrtryaccess
(
&
ip
->
i_mmaplock
))
goto
out_undo_iolock
;
}
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
{
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
{
if
(
!
mrtryupdate
(
&
ip
->
i_lock
))
if
(
!
mrtryupdate
(
&
ip
->
i_lock
))
goto
out_undo_
io
lock
;
goto
out_undo_
mmap
lock
;
}
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
{
}
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
{
if
(
!
mrtryaccess
(
&
ip
->
i_lock
))
if
(
!
mrtryaccess
(
&
ip
->
i_lock
))
goto
out_undo_
io
lock
;
goto
out_undo_
mmap
lock
;
}
}
return
1
;
return
1
;
out_undo_iolock:
out_undo_mmaplock:
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
mrunlock_excl
(
&
ip
->
i_mmaplock
);
else
if
(
lock_flags
&
XFS_MMAPLOCK_SHARED
)
mrunlock_shared
(
&
ip
->
i_mmaplock
);
out_undo_iolock:
if
(
lock_flags
&
XFS_IOLOCK_EXCL
)
if
(
lock_flags
&
XFS_IOLOCK_EXCL
)
mrunlock_excl
(
&
ip
->
i_iolock
);
mrunlock_excl
(
&
ip
->
i_iolock
);
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
mrunlock_shared
(
&
ip
->
i_iolock
);
mrunlock_shared
(
&
ip
->
i_iolock
);
out:
out:
return
0
;
return
0
;
}
}
...
@@ -244,6 +277,8 @@ xfs_iunlock(
...
@@ -244,6 +277,8 @@ xfs_iunlock(
*/
*/
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
ASSERT
((
lock_flags
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
!=
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
!=
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
));
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
ASSERT
((
lock_flags
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
!=
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
ASSERT
((
lock_flags
&
~
(
XFS_LOCK_MASK
|
XFS_LOCK_DEP_MASK
))
==
0
);
...
@@ -254,6 +289,11 @@ xfs_iunlock(
...
@@ -254,6 +289,11 @@ xfs_iunlock(
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
else
if
(
lock_flags
&
XFS_IOLOCK_SHARED
)
mrunlock_shared
(
&
ip
->
i_iolock
);
mrunlock_shared
(
&
ip
->
i_iolock
);
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
mrunlock_excl
(
&
ip
->
i_mmaplock
);
else
if
(
lock_flags
&
XFS_MMAPLOCK_SHARED
)
mrunlock_shared
(
&
ip
->
i_mmaplock
);
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
mrunlock_excl
(
&
ip
->
i_lock
);
mrunlock_excl
(
&
ip
->
i_lock
);
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
else
if
(
lock_flags
&
XFS_ILOCK_SHARED
)
...
@@ -271,11 +311,14 @@ xfs_ilock_demote(
...
@@ -271,11 +311,14 @@ xfs_ilock_demote(
xfs_inode_t
*
ip
,
xfs_inode_t
*
ip
,
uint
lock_flags
)
uint
lock_flags
)
{
{
ASSERT
(
lock_flags
&
(
XFS_IOLOCK_EXCL
|
XFS_ILOCK_EXCL
));
ASSERT
(
lock_flags
&
(
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
|
XFS_ILOCK_EXCL
));
ASSERT
((
lock_flags
&
~
(
XFS_IOLOCK_EXCL
|
XFS_ILOCK_EXCL
))
==
0
);
ASSERT
((
lock_flags
&
~
(
XFS_IOLOCK_EXCL
|
XFS_MMAPLOCK_EXCL
|
XFS_ILOCK_EXCL
))
==
0
);
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
if
(
lock_flags
&
XFS_ILOCK_EXCL
)
mrdemote
(
&
ip
->
i_lock
);
mrdemote
(
&
ip
->
i_lock
);
if
(
lock_flags
&
XFS_MMAPLOCK_EXCL
)
mrdemote
(
&
ip
->
i_mmaplock
);
if
(
lock_flags
&
XFS_IOLOCK_EXCL
)
if
(
lock_flags
&
XFS_IOLOCK_EXCL
)
mrdemote
(
&
ip
->
i_iolock
);
mrdemote
(
&
ip
->
i_iolock
);
...
@@ -294,6 +337,12 @@ xfs_isilocked(
...
@@ -294,6 +337,12 @@ xfs_isilocked(
return
rwsem_is_locked
(
&
ip
->
i_lock
.
mr_lock
);
return
rwsem_is_locked
(
&
ip
->
i_lock
.
mr_lock
);
}
}
if
(
lock_flags
&
(
XFS_MMAPLOCK_EXCL
|
XFS_MMAPLOCK_SHARED
))
{
if
(
!
(
lock_flags
&
XFS_MMAPLOCK_SHARED
))
return
!!
ip
->
i_mmaplock
.
mr_writer
;
return
rwsem_is_locked
(
&
ip
->
i_mmaplock
.
mr_lock
);
}
if
(
lock_flags
&
(
XFS_IOLOCK_EXCL
|
XFS_IOLOCK_SHARED
))
{
if
(
lock_flags
&
(
XFS_IOLOCK_EXCL
|
XFS_IOLOCK_SHARED
))
{
if
(
!
(
lock_flags
&
XFS_IOLOCK_SHARED
))
if
(
!
(
lock_flags
&
XFS_IOLOCK_SHARED
))
return
!!
ip
->
i_iolock
.
mr_writer
;
return
!!
ip
->
i_iolock
.
mr_writer
;
...
@@ -314,14 +363,27 @@ int xfs_lock_delays;
...
@@ -314,14 +363,27 @@ int xfs_lock_delays;
#endif
#endif
/*
/*
* Bump the subclass so xfs_lock_inodes() acquires each lock with
* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
* a different value
* value. This shouldn't be called for page fault locking, but we also need to
* ensure we don't overrun the number of lockdep subclasses for the iolock or
* mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
*/
*/
static
inline
int
static
inline
int
xfs_lock_inumorder
(
int
lock_mode
,
int
subclass
)
xfs_lock_inumorder
(
int
lock_mode
,
int
subclass
)
{
{
if
(
lock_mode
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
if
(
lock_mode
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
{
ASSERT
(
subclass
+
XFS_LOCK_INUMORDER
<
(
1
<<
(
XFS_MMAPLOCK_SHIFT
-
XFS_IOLOCK_SHIFT
)));
lock_mode
|=
(
subclass
+
XFS_LOCK_INUMORDER
)
<<
XFS_IOLOCK_SHIFT
;
lock_mode
|=
(
subclass
+
XFS_LOCK_INUMORDER
)
<<
XFS_IOLOCK_SHIFT
;
}
if
(
lock_mode
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
{
ASSERT
(
subclass
+
XFS_LOCK_INUMORDER
<
(
1
<<
(
XFS_ILOCK_SHIFT
-
XFS_MMAPLOCK_SHIFT
)));
lock_mode
|=
(
subclass
+
XFS_LOCK_INUMORDER
)
<<
XFS_MMAPLOCK_SHIFT
;
}
if
(
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
if
(
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
lock_mode
|=
(
subclass
+
XFS_LOCK_INUMORDER
)
<<
XFS_ILOCK_SHIFT
;
lock_mode
|=
(
subclass
+
XFS_LOCK_INUMORDER
)
<<
XFS_ILOCK_SHIFT
;
...
@@ -440,10 +502,10 @@ xfs_lock_inodes(
...
@@ -440,10 +502,10 @@ xfs_lock_inodes(
}
}
/*
/*
* xfs_lock_two_inodes() can only be used to lock one type of lock
* xfs_lock_two_inodes() can only be used to lock one type of lock
at a time -
*
at a time - the iolock or the ilock, but not both at once. If
*
the iolock, the mmaplock or the ilock, but not more than one at a time. If we
*
we lock both at once, lockdep will report false positives saying
*
lock more than one at a time, lockdep will report false positives saying we
*
we
have violated locking orders.
* have violated locking orders.
*/
*/
void
void
xfs_lock_two_inodes
(
xfs_lock_two_inodes
(
...
@@ -455,8 +517,12 @@ xfs_lock_two_inodes(
...
@@ -455,8 +517,12 @@ xfs_lock_two_inodes(
int
attempts
=
0
;
int
attempts
=
0
;
xfs_log_item_t
*
lp
;
xfs_log_item_t
*
lp
;
if
(
lock_mode
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
if
(
lock_mode
&
(
XFS_IOLOCK_SHARED
|
XFS_IOLOCK_EXCL
))
{
ASSERT
((
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
))
==
0
);
ASSERT
(
!
(
lock_mode
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
)));
ASSERT
(
!
(
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
)));
}
else
if
(
lock_mode
&
(
XFS_MMAPLOCK_SHARED
|
XFS_MMAPLOCK_EXCL
))
ASSERT
(
!
(
lock_mode
&
(
XFS_ILOCK_SHARED
|
XFS_ILOCK_EXCL
)));
ASSERT
(
ip0
->
i_ino
!=
ip1
->
i_ino
);
ASSERT
(
ip0
->
i_ino
!=
ip1
->
i_ino
);
if
(
ip0
->
i_ino
>
ip1
->
i_ino
)
{
if
(
ip0
->
i_ino
>
ip1
->
i_ino
)
{
...
...
fs/xfs/xfs_inode.h
View file @
88e8fda9
...
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
...
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
struct
xfs_inode_log_item
*
i_itemp
;
/* logging information */
struct
xfs_inode_log_item
*
i_itemp
;
/* logging information */
mrlock_t
i_lock
;
/* inode lock */
mrlock_t
i_lock
;
/* inode lock */
mrlock_t
i_iolock
;
/* inode IO lock */
mrlock_t
i_iolock
;
/* inode IO lock */
mrlock_t
i_mmaplock
;
/* inode mmap IO lock */
atomic_t
i_pincount
;
/* inode pin count */
atomic_t
i_pincount
;
/* inode pin count */
spinlock_t
i_flags_lock
;
/* inode i_flags lock */
spinlock_t
i_flags_lock
;
/* inode i_flags lock */
/* Miscellaneous state. */
/* Miscellaneous state. */
...
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
...
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_SHARED (1<<1)
#define XFS_IOLOCK_SHARED (1<<1)
#define XFS_ILOCK_EXCL (1<<2)
#define XFS_ILOCK_EXCL (1<<2)
#define XFS_ILOCK_SHARED (1<<3)
#define XFS_ILOCK_SHARED (1<<3)
#define XFS_MMAPLOCK_EXCL (1<<4)
#define XFS_MMAPLOCK_SHARED (1<<5)
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
#define XFS_LOCK_FLAGS \
#define XFS_LOCK_FLAGS \
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
{ XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
{ XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
/*
/*
...
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
...
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_SHIFT 16
#define XFS_IOLOCK_SHIFT 16
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
#define XFS_MMAPLOCK_SHIFT 20
#define XFS_ILOCK_SHIFT 24
#define XFS_ILOCK_SHIFT 24
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
#define XFS_IOLOCK_DEP_MASK 0x00ff0000
#define XFS_IOLOCK_DEP_MASK 0x000f0000
#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
#define XFS_ILOCK_DEP_MASK 0xff000000
#define XFS_ILOCK_DEP_MASK 0xff000000
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
XFS_MMAPLOCK_DEP_MASK | \
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
XFS_ILOCK_DEP_MASK)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
>> XFS_IOLOCK_SHIFT)
#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
>> XFS_MMAPLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
>> XFS_ILOCK_SHIFT)
/*
/*
* For multiple groups support: if S_ISGID bit is set in the parent
* For multiple groups support: if S_ISGID bit is set in the parent
...
...
fs/xfs/xfs_ioctl.c
View file @
88e8fda9
...
@@ -631,7 +631,7 @@ xfs_ioc_space(
...
@@ -631,7 +631,7 @@ xfs_ioc_space(
if
(
filp
->
f_flags
&
O_DSYNC
)
if
(
filp
->
f_flags
&
O_DSYNC
)
flags
|=
XFS_PREALLOC_SYNC
;
flags
|=
XFS_PREALLOC_SYNC
;
if
(
ioflags
&
XFS_IO_INVIS
)
if
(
ioflags
&
XFS_IO_INVIS
)
flags
|=
XFS_PREALLOC_INVISIBLE
;
flags
|=
XFS_PREALLOC_INVISIBLE
;
error
=
mnt_want_write_file
(
filp
);
error
=
mnt_want_write_file
(
filp
);
...
@@ -643,6 +643,9 @@ xfs_ioc_space(
...
@@ -643,6 +643,9 @@ xfs_ioc_space(
if
(
error
)
if
(
error
)
goto
out_unlock
;
goto
out_unlock
;
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
switch
(
bf
->
l_whence
)
{
switch
(
bf
->
l_whence
)
{
case
0
:
/*SEEK_SET*/
case
0
:
/*SEEK_SET*/
break
;
break
;
...
...
fs/xfs/xfs_iops.c
View file @
88e8fda9
...
@@ -771,6 +771,7 @@ xfs_setattr_size(
...
@@ -771,6 +771,7 @@ xfs_setattr_size(
return
error
;
return
error
;
ASSERT
(
xfs_isilocked
(
ip
,
XFS_IOLOCK_EXCL
));
ASSERT
(
xfs_isilocked
(
ip
,
XFS_IOLOCK_EXCL
));
ASSERT
(
xfs_isilocked
(
ip
,
XFS_MMAPLOCK_EXCL
));
ASSERT
(
S_ISREG
(
ip
->
i_d
.
di_mode
));
ASSERT
(
S_ISREG
(
ip
->
i_d
.
di_mode
));
ASSERT
((
iattr
->
ia_valid
&
(
ATTR_UID
|
ATTR_GID
|
ATTR_ATIME
|
ATTR_ATIME_SET
|
ASSERT
((
iattr
->
ia_valid
&
(
ATTR_UID
|
ATTR_GID
|
ATTR_ATIME
|
ATTR_ATIME_SET
|
ATTR_MTIME_SET
|
ATTR_KILL_PRIV
|
ATTR_TIMES_SET
))
==
0
);
ATTR_MTIME_SET
|
ATTR_KILL_PRIV
|
ATTR_TIMES_SET
))
==
0
);
...
@@ -834,55 +835,27 @@ xfs_setattr_size(
...
@@ -834,55 +835,27 @@ xfs_setattr_size(
inode_dio_wait
(
inode
);
inode_dio_wait
(
inode
);
/*
/*
* Do all the page cache truncate work outside the transaction context
* We've already locked out new page faults, so now we can safely remove
* as the "lock" order is page lock->log space reservation. i.e.
* pages from the page cache knowing they won't get refaulted until we
* locking pages inside the transaction can ABBA deadlock with
* drop the XFS_MMAP_EXCL lock after the extent manipulations are
* writeback. We have to do the VFS inode size update before we truncate
* complete. The truncate_setsize() call also cleans partial EOF page
* the pagecache, however, to avoid racing with page faults beyond the
* PTEs on extending truncates and hence ensures sub-page block size
* new EOF they are not serialised against truncate operations except by
* filesystems are correctly handled, too.
* page locks and size updates.
*
*
* Hence we are in a situation where a truncate can fail with ENOMEM
* We have to do all the page cache truncate work outside the
* from xfs_trans_reserve(), but having already truncated the in-memory
* transaction context as the "lock" order is page lock->log space
* version of the file (i.e. made user visible changes). There's not
* reservation as defined by extent allocation in the writeback path.
* much we can do about this, except to hope that the caller sees ENOMEM
* Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
* and retries the truncate operation.
* having already truncated the in-memory version of the file (i.e. made
* user visible changes). There's not much we can do about this, except
* to hope that the caller sees ENOMEM and retries the truncate
* operation.
*/
*/
error
=
block_truncate_page
(
inode
->
i_mapping
,
newsize
,
xfs_get_blocks
);
error
=
block_truncate_page
(
inode
->
i_mapping
,
newsize
,
xfs_get_blocks
);
if
(
error
)
if
(
error
)
return
error
;
return
error
;
truncate_setsize
(
inode
,
newsize
);
truncate_setsize
(
inode
,
newsize
);
/*
* The "we can't serialise against page faults" pain gets worse.
*
* If the file is mapped then we have to clean the page at the old EOF
* when extending the file. Extending the file can expose changes the
* underlying page mapping (e.g. from beyond EOF to a hole or
* unwritten), and so on the next attempt to write to that page we need
* to remap it for write. i.e. we need .page_mkwrite() to be called.
* Hence we need to clean the page to clean the pte and so a new write
* fault will be triggered appropriately.
*
* If we do it before we change the inode size, then we can race with a
* page fault that maps the page with exactly the same problem. If we do
* it after we change the file size, then a new page fault can come in
* and allocate space before we've run the rest of the truncate
* transaction. That's kinda grotesque, but it's better than have data
* over a hole, and so that's the lesser evil that has been chosen here.
*
* The real solution, however, is to have some mechanism for locking out
* page faults while a truncate is in progress.
*/
if
(
newsize
>
oldsize
&&
mapping_mapped
(
VFS_I
(
ip
)
->
i_mapping
))
{
error
=
filemap_write_and_wait_range
(
VFS_I
(
ip
)
->
i_mapping
,
round_down
(
oldsize
,
PAGE_CACHE_SIZE
),
round_up
(
oldsize
,
PAGE_CACHE_SIZE
)
-
1
);
if
(
error
)
return
error
;
}
tp
=
xfs_trans_alloc
(
mp
,
XFS_TRANS_SETATTR_SIZE
);
tp
=
xfs_trans_alloc
(
mp
,
XFS_TRANS_SETATTR_SIZE
);
error
=
xfs_trans_reserve
(
tp
,
&
M_RES
(
mp
)
->
tr_itruncate
,
0
,
0
);
error
=
xfs_trans_reserve
(
tp
,
&
M_RES
(
mp
)
->
tr_itruncate
,
0
,
0
);
if
(
error
)
if
(
error
)
...
@@ -981,8 +954,12 @@ xfs_vn_setattr(
...
@@ -981,8 +954,12 @@ xfs_vn_setattr(
xfs_ilock
(
ip
,
iolock
);
xfs_ilock
(
ip
,
iolock
);
error
=
xfs_break_layouts
(
dentry
->
d_inode
,
&
iolock
);
error
=
xfs_break_layouts
(
dentry
->
d_inode
,
&
iolock
);
if
(
!
error
)
if
(
!
error
)
{
xfs_ilock
(
ip
,
XFS_MMAPLOCK_EXCL
);
iolock
|=
XFS_MMAPLOCK_EXCL
;
error
=
xfs_setattr_size
(
ip
,
iattr
);
error
=
xfs_setattr_size
(
ip
,
iattr
);
}
xfs_iunlock
(
ip
,
iolock
);
xfs_iunlock
(
ip
,
iolock
);
}
else
{
}
else
{
error
=
xfs_setattr_nonsize
(
ip
,
iattr
,
0
);
error
=
xfs_setattr_nonsize
(
ip
,
iattr
,
0
);
...
...
fs/xfs/xfs_super.c
View file @
88e8fda9
...
@@ -966,6 +966,8 @@ xfs_fs_inode_init_once(
...
@@ -966,6 +966,8 @@ xfs_fs_inode_init_once(
atomic_set
(
&
ip
->
i_pincount
,
0
);
atomic_set
(
&
ip
->
i_pincount
,
0
);
spin_lock_init
(
&
ip
->
i_flags_lock
);
spin_lock_init
(
&
ip
->
i_flags_lock
);
mrlock_init
(
&
ip
->
i_mmaplock
,
MRLOCK_ALLOW_EQUAL_PRI
|
MRLOCK_BARRIER
,
"xfsino"
,
ip
->
i_ino
);
mrlock_init
(
&
ip
->
i_lock
,
MRLOCK_ALLOW_EQUAL_PRI
|
MRLOCK_BARRIER
,
mrlock_init
(
&
ip
->
i_lock
,
MRLOCK_ALLOW_EQUAL_PRI
|
MRLOCK_BARRIER
,
"xfsino"
,
ip
->
i_ino
);
"xfsino"
,
ip
->
i_ino
);
}
}
...
...
fs/xfs/xfs_trace.h
View file @
88e8fda9
...
@@ -685,6 +685,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
...
@@ -685,6 +685,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
DEFINE_INODE_EVENT
(
xfs_inode_clear_eofblocks_tag
);
DEFINE_INODE_EVENT
(
xfs_inode_clear_eofblocks_tag
);
DEFINE_INODE_EVENT
(
xfs_inode_free_eofblocks_invalid
);
DEFINE_INODE_EVENT
(
xfs_inode_free_eofblocks_invalid
);
DEFINE_INODE_EVENT
(
xfs_filemap_fault
);
DEFINE_INODE_EVENT
(
xfs_filemap_page_mkwrite
);
DECLARE_EVENT_CLASS
(
xfs_iref_class
,
DECLARE_EVENT_CLASS
(
xfs_iref_class
,
TP_PROTO
(
struct
xfs_inode
*
ip
,
unsigned
long
caller_ip
),
TP_PROTO
(
struct
xfs_inode
*
ip
,
unsigned
long
caller_ip
),
TP_ARGS
(
ip
,
caller_ip
),
TP_ARGS
(
ip
,
caller_ip
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment