Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
264e89ad
Commit
264e89ad
authored
Nov 03, 2015
by
Dave Chinner
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'xfs-dax-updates' into for-next
parents
2da5c4b0
13ad4fe3
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
230 additions
and
65 deletions
+230
-65
fs/dax.c
fs/dax.c
+5
-0
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_alloc.c
+9
-1
fs/xfs/libxfs/xfs_alloc.h
fs/xfs/libxfs/xfs_alloc.h
+5
-3
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap.c
+33
-2
fs/xfs/libxfs/xfs_bmap.h
fs/xfs/libxfs/xfs_bmap.h
+11
-2
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.c
+50
-46
fs/xfs/xfs_aops.h
fs/xfs/xfs_aops.h
+2
-1
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_bmap_util.c
+36
-0
fs/xfs/xfs_file.c
fs/xfs/xfs_file.c
+55
-9
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.c
+20
-1
fs/xfs/xfs_mount.h
fs/xfs/xfs_mount.h
+3
-0
fs/xfs/xfs_trace.h
fs/xfs/xfs_trace.h
+1
-0
No files found.
fs/dax.c
View file @
264e89ad
...
...
@@ -29,6 +29,11 @@
#include <linux/uio.h>
#include <linux/vmstat.h>
/*
* dax_clear_blocks() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS
* semantics for all operations.
*/
int
dax_clear_blocks
(
struct
inode
*
inode
,
sector_t
block
,
long
size
)
{
struct
block_device
*
bdev
=
inode
->
i_sb
->
s_bdev
;
...
...
fs/xfs/libxfs/xfs_alloc.c
View file @
264e89ad
...
...
@@ -2509,7 +2509,7 @@ xfs_alloc_vextent(
* Try near allocation first, then anywhere-in-ag after
* the first a.g. fails.
*/
if
((
args
->
userdata
==
XFS_ALLOC_INITIAL_USER_DATA
)
&&
if
((
args
->
userdata
&
XFS_ALLOC_INITIAL_USER_DATA
)
&&
(
mp
->
m_flags
&
XFS_MOUNT_32BITINODES
))
{
args
->
fsbno
=
XFS_AGB_TO_FSB
(
mp
,
((
mp
->
m_agfrotor
/
rotorstep
)
%
...
...
@@ -2640,6 +2640,14 @@ xfs_alloc_vextent(
XFS_AG_CHECK_DADDR
(
mp
,
XFS_FSB_TO_DADDR
(
mp
,
args
->
fsbno
),
args
->
len
);
#endif
/* Zero the extent if we were asked to do so */
if
(
args
->
userdata
&
XFS_ALLOC_USERDATA_ZERO
)
{
error
=
xfs_zero_extent
(
args
->
ip
,
args
->
fsbno
,
args
->
len
);
if
(
error
)
goto
error0
;
}
}
xfs_perag_put
(
args
->
pag
);
return
0
;
...
...
fs/xfs/libxfs/xfs_alloc.h
View file @
264e89ad
...
...
@@ -101,6 +101,7 @@ typedef struct xfs_alloc_arg {
struct
xfs_mount
*
mp
;
/* file system mount point */
struct
xfs_buf
*
agbp
;
/* buffer for a.g. freelist header */
struct
xfs_perag
*
pag
;
/* per-ag struct for this agno */
struct
xfs_inode
*
ip
;
/* for userdata zeroing method */
xfs_fsblock_t
fsbno
;
/* file system block number */
xfs_agnumber_t
agno
;
/* allocation group number */
xfs_agblock_t
agbno
;
/* allocation group-relative block # */
...
...
@@ -120,15 +121,16 @@ typedef struct xfs_alloc_arg {
char
wasdel
;
/* set if allocation was prev delayed */
char
wasfromfl
;
/* set if allocation is from freelist */
char
isfl
;
/* set if is freelist blocks - !acctg */
char
userdata
;
/*
set if this is user data
*/
char
userdata
;
/*
mask defining userdata treatment
*/
xfs_fsblock_t
firstblock
;
/* io first block allocated */
}
xfs_alloc_arg_t
;
/*
* Defines for userdata
*/
#define XFS_ALLOC_USERDATA 1
/* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA 2
/* special case start of file */
#define XFS_ALLOC_USERDATA (1 << 0)
/* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)
/* special case start of file */
#define XFS_ALLOC_USERDATA_ZERO (1 << 2)
/* zero extent on allocation */
xfs_extlen_t
xfs_alloc_longest_free_extent
(
struct
xfs_mount
*
mp
,
struct
xfs_perag
*
pag
,
xfs_extlen_t
need
);
...
...
fs/xfs/libxfs/xfs_bmap.c
View file @
264e89ad
...
...
@@ -3802,8 +3802,13 @@ xfs_bmap_btalloc(
args
.
wasdel
=
ap
->
wasdel
;
args
.
isfl
=
0
;
args
.
userdata
=
ap
->
userdata
;
if
((
error
=
xfs_alloc_vextent
(
&
args
)))
if
(
ap
->
userdata
&
XFS_ALLOC_USERDATA_ZERO
)
args
.
ip
=
ap
->
ip
;
error
=
xfs_alloc_vextent
(
&
args
);
if
(
error
)
return
error
;
if
(
tryagain
&&
args
.
fsbno
==
NULLFSBLOCK
)
{
/*
* Exact allocation failed. Now try with alignment
...
...
@@ -4302,11 +4307,14 @@ xfs_bmapi_allocate(
/*
* Indicate if this is the first user data in the file, or just any
* user data.
* user data. And if it is userdata, indicate whether it needs to
* be initialised to zero during allocation.
*/
if
(
!
(
bma
->
flags
&
XFS_BMAPI_METADATA
))
{
bma
->
userdata
=
(
bma
->
offset
==
0
)
?
XFS_ALLOC_INITIAL_USER_DATA
:
XFS_ALLOC_USERDATA
;
if
(
bma
->
flags
&
XFS_BMAPI_ZERO
)
bma
->
userdata
|=
XFS_ALLOC_USERDATA_ZERO
;
}
bma
->
minlen
=
(
bma
->
flags
&
XFS_BMAPI_CONTIG
)
?
bma
->
length
:
1
;
...
...
@@ -4421,6 +4429,17 @@ xfs_bmapi_convert_unwritten(
mval
->
br_state
=
(
mval
->
br_state
==
XFS_EXT_UNWRITTEN
)
?
XFS_EXT_NORM
:
XFS_EXT_UNWRITTEN
;
/*
* Before insertion into the bmbt, zero the range being converted
* if required.
*/
if
(
flags
&
XFS_BMAPI_ZERO
)
{
error
=
xfs_zero_extent
(
bma
->
ip
,
mval
->
br_startblock
,
mval
->
br_blockcount
);
if
(
error
)
return
error
;
}
error
=
xfs_bmap_add_extent_unwritten_real
(
bma
->
tp
,
bma
->
ip
,
&
bma
->
idx
,
&
bma
->
cur
,
mval
,
bma
->
firstblock
,
bma
->
flist
,
&
tmp_logflags
);
...
...
@@ -4514,6 +4533,18 @@ xfs_bmapi_write(
ASSERT
(
XFS_IFORK_FORMAT
(
ip
,
whichfork
)
!=
XFS_DINODE_FMT_LOCAL
);
ASSERT
(
xfs_isilocked
(
ip
,
XFS_ILOCK_EXCL
));
/* zeroing is for currently only for data extents, not metadata */
ASSERT
((
flags
&
(
XFS_BMAPI_METADATA
|
XFS_BMAPI_ZERO
))
!=
(
XFS_BMAPI_METADATA
|
XFS_BMAPI_ZERO
));
/*
* we can allocate unwritten extents or pre-zero allocated blocks,
* but it makes no sense to do both at once. This would result in
* zeroing the unwritten extent twice, but it still being an
* unwritten extent....
*/
ASSERT
((
flags
&
(
XFS_BMAPI_PREALLOC
|
XFS_BMAPI_ZERO
))
!=
(
XFS_BMAPI_PREALLOC
|
XFS_BMAPI_ZERO
));
if
(
unlikely
(
XFS_TEST_ERROR
(
(
XFS_IFORK_FORMAT
(
ip
,
whichfork
)
!=
XFS_DINODE_FMT_EXTENTS
&&
XFS_IFORK_FORMAT
(
ip
,
whichfork
)
!=
XFS_DINODE_FMT_BTREE
),
...
...
fs/xfs/libxfs/xfs_bmap.h
View file @
264e89ad
...
...
@@ -52,9 +52,9 @@ struct xfs_bmalloca {
xfs_extlen_t
minleft
;
/* amount must be left after alloc */
bool
eof
;
/* set if allocating past last extent */
bool
wasdel
;
/* replacing a delayed allocation */
bool
userdata
;
/* set if is user data */
bool
aeof
;
/* allocated space at eof */
bool
conv
;
/* overwriting unwritten extents */
char
userdata
;
/* userdata mask */
int
flags
;
};
...
...
@@ -109,6 +109,14 @@ typedef struct xfs_bmap_free
*/
#define XFS_BMAPI_CONVERT 0x040
/*
* allocate zeroed extents - this requires all newly allocated user data extents
* to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set.
* Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found
* during the allocation range to zeroed written extents.
*/
#define XFS_BMAPI_ZERO 0x080
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
...
...
@@ -116,7 +124,8 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
{ XFS_BMAPI_CONVERT, "CONVERT" }
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_ZERO, "ZERO" }
static
inline
int
xfs_bmapi_aflag
(
int
w
)
...
...
fs/xfs/xfs_aops.c
View file @
264e89ad
...
...
@@ -1259,13 +1259,28 @@ xfs_vm_releasepage(
* the DIO. There is only going to be one reference to the ioend and its life
* cycle is constrained by the DIO completion code. hence we don't need
* reference counting here.
*
* Note that for DIO, an IO to the highest supported file block offset (i.e.
* 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
* bit variable. Hence if we see this overflow, we have to assume that the IO is
* extending the file size. We won't know for sure until IO completion is run
* and the actual max write offset is communicated to the IO completion
* routine.
*
* For DAX page faults, we are preparing to never see unwritten extents here,
* nor should we ever extend the inode size. Hence we will soon have nothing to
* do here for this case, ensuring we don't have to provide an IO completion
* callback to free an ioend that we don't actually need for a fault into the
* page at offset (2^63 - 1FSB) bytes.
*/
static
void
xfs_map_direct
(
struct
inode
*
inode
,
struct
buffer_head
*
bh_result
,
struct
xfs_bmbt_irec
*
imap
,
xfs_off_t
offset
)
xfs_off_t
offset
,
bool
dax_fault
)
{
struct
xfs_ioend
*
ioend
;
xfs_off_t
size
=
bh_result
->
b_size
;
...
...
@@ -1278,6 +1293,13 @@ xfs_map_direct(
trace_xfs_gbmap_direct
(
XFS_I
(
inode
),
offset
,
size
,
type
,
imap
);
if
(
dax_fault
)
{
ASSERT
(
type
==
XFS_IO_OVERWRITE
);
trace_xfs_gbmap_direct_none
(
XFS_I
(
inode
),
offset
,
size
,
type
,
imap
);
return
;
}
if
(
bh_result
->
b_private
)
{
ioend
=
bh_result
->
b_private
;
ASSERT
(
ioend
->
io_size
>
0
);
...
...
@@ -1292,7 +1314,8 @@ xfs_map_direct(
ioend
->
io_size
,
ioend
->
io_type
,
imap
);
}
else
if
(
type
==
XFS_IO_UNWRITTEN
||
offset
+
size
>
i_size_read
(
inode
))
{
offset
+
size
>
i_size_read
(
inode
)
||
offset
+
size
<
0
)
{
ioend
=
xfs_alloc_ioend
(
inode
,
type
);
ioend
->
io_offset
=
offset
;
ioend
->
io_size
=
size
;
...
...
@@ -1354,7 +1377,8 @@ __xfs_get_blocks(
sector_t
iblock
,
struct
buffer_head
*
bh_result
,
int
create
,
bool
direct
)
bool
direct
,
bool
dax_fault
)
{
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
...
...
@@ -1402,10 +1426,12 @@ __xfs_get_blocks(
if
(
error
)
goto
out_unlock
;
/* for DAX, we convert unwritten extents directly */
if
(
create
&&
(
!
nimaps
||
(
imap
.
br_startblock
==
HOLESTARTBLOCK
||
imap
.
br_startblock
==
DELAYSTARTBLOCK
)))
{
imap
.
br_startblock
==
DELAYSTARTBLOCK
)
||
(
IS_DAX
(
inode
)
&&
ISUNWRITTEN
(
&
imap
))))
{
if
(
direct
||
xfs_get_extsz_hint
(
ip
))
{
/*
* xfs_iomap_write_direct() expects the shared lock. It
...
...
@@ -1450,6 +1476,12 @@ __xfs_get_blocks(
goto
out_unlock
;
}
if
(
IS_DAX
(
inode
)
&&
create
)
{
ASSERT
(
!
ISUNWRITTEN
(
&
imap
));
/* zeroing is not needed at a higher layer */
new
=
0
;
}
/* trim mapping down to size requested */
if
(
direct
||
size
>
(
1
<<
inode
->
i_blkbits
))
xfs_map_trim_size
(
inode
,
iblock
,
bh_result
,
...
...
@@ -1467,7 +1499,8 @@ __xfs_get_blocks(
set_buffer_unwritten
(
bh_result
);
/* direct IO needs special help */
if
(
create
&&
direct
)
xfs_map_direct
(
inode
,
bh_result
,
&
imap
,
offset
);
xfs_map_direct
(
inode
,
bh_result
,
&
imap
,
offset
,
dax_fault
);
}
/*
...
...
@@ -1514,7 +1547,7 @@ xfs_get_blocks(
struct
buffer_head
*
bh_result
,
int
create
)
{
return
__xfs_get_blocks
(
inode
,
iblock
,
bh_result
,
create
,
false
);
return
__xfs_get_blocks
(
inode
,
iblock
,
bh_result
,
create
,
false
,
false
);
}
int
...
...
@@ -1524,7 +1557,17 @@ xfs_get_blocks_direct(
struct
buffer_head
*
bh_result
,
int
create
)
{
return
__xfs_get_blocks
(
inode
,
iblock
,
bh_result
,
create
,
true
);
return
__xfs_get_blocks
(
inode
,
iblock
,
bh_result
,
create
,
true
,
false
);
}
int
xfs_get_blocks_dax_fault
(
struct
inode
*
inode
,
sector_t
iblock
,
struct
buffer_head
*
bh_result
,
int
create
)
{
return
__xfs_get_blocks
(
inode
,
iblock
,
bh_result
,
create
,
true
,
true
);
}
static
void
...
...
@@ -1623,45 +1666,6 @@ xfs_end_io_direct_write(
__xfs_end_io_direct_write
(
inode
,
ioend
,
offset
,
size
);
}
/*
* For DAX we need a mapping buffer callback for unwritten extent conversion
* when page faults allocate blocks and then zero them. Note that in this
* case the mapping indicated by the ioend may extend beyond EOF. We most
* definitely do not want to extend EOF here, so we trim back the ioend size to
* EOF.
*/
#ifdef CONFIG_FS_DAX
void
xfs_end_io_dax_write
(
struct
buffer_head
*
bh
,
int
uptodate
)
{
struct
xfs_ioend
*
ioend
=
bh
->
b_private
;
struct
inode
*
inode
=
ioend
->
io_inode
;
ssize_t
size
=
ioend
->
io_size
;
ASSERT
(
IS_DAX
(
ioend
->
io_inode
));
/* if there was an error zeroing, then don't convert it */
if
(
!
uptodate
)
ioend
->
io_error
=
-
EIO
;
/*
* Trim update to EOF, so we don't extend EOF during unwritten extent
* conversion of partial EOF blocks.
*/
spin_lock
(
&
XFS_I
(
inode
)
->
i_flags_lock
);
if
(
ioend
->
io_offset
+
size
>
i_size_read
(
inode
))
size
=
i_size_read
(
inode
)
-
ioend
->
io_offset
;
spin_unlock
(
&
XFS_I
(
inode
)
->
i_flags_lock
);
__xfs_end_io_direct_write
(
inode
,
ioend
,
ioend
->
io_offset
,
size
);
}
#else
void
xfs_end_io_dax_write
(
struct
buffer_head
*
bh
,
int
uptodate
)
{
}
#endif
static
inline
ssize_t
xfs_vm_do_dio
(
struct
inode
*
inode
,
...
...
fs/xfs/xfs_aops.h
View file @
264e89ad
...
...
@@ -58,7 +58,8 @@ int xfs_get_blocks(struct inode *inode, sector_t offset,
struct
buffer_head
*
map_bh
,
int
create
);
int
xfs_get_blocks_direct
(
struct
inode
*
inode
,
sector_t
offset
,
struct
buffer_head
*
map_bh
,
int
create
);
void
xfs_end_io_dax_write
(
struct
buffer_head
*
bh
,
int
uptodate
);
int
xfs_get_blocks_dax_fault
(
struct
inode
*
inode
,
sector_t
offset
,
struct
buffer_head
*
map_bh
,
int
create
);
extern
void
xfs_count_page_state
(
struct
page
*
,
int
*
,
int
*
);
...
...
fs/xfs/xfs_bmap_util.c
View file @
264e89ad
...
...
@@ -56,6 +56,35 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
XFS_FSB_TO_DADDR
((
ip
)
->
i_mount
,
(
fsb
)));
}
/*
* Routine to zero an extent on disk allocated to the specific inode.
*
* The VFS functions take a linearised filesystem block offset, so we have to
* convert the sparse xfs fsb to the right format first.
* VFS types are real funky, too.
*/
int
xfs_zero_extent
(
struct
xfs_inode
*
ip
,
xfs_fsblock_t
start_fsb
,
xfs_off_t
count_fsb
)
{
struct
xfs_mount
*
mp
=
ip
->
i_mount
;
xfs_daddr_t
sector
=
xfs_fsb_to_db
(
ip
,
start_fsb
);
sector_t
block
=
XFS_BB_TO_FSBT
(
mp
,
sector
);
ssize_t
size
=
XFS_FSB_TO_B
(
mp
,
count_fsb
);
if
(
IS_DAX
(
VFS_I
(
ip
)))
return
dax_clear_blocks
(
VFS_I
(
ip
),
block
,
size
);
/*
* let the block layer decide on the fastest method of
* implementing the zeroing.
*/
return
sb_issue_zeroout
(
mp
->
m_super
,
block
,
count_fsb
,
GFP_NOFS
);
}
/*
* Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
* caller. Frees all the extents that need freeing, which must be done
...
...
@@ -229,6 +258,13 @@ xfs_bmap_rtalloc(
xfs_trans_mod_dquot_byino
(
ap
->
tp
,
ap
->
ip
,
ap
->
wasdel
?
XFS_TRANS_DQ_DELRTBCOUNT
:
XFS_TRANS_DQ_RTBCOUNT
,
(
long
)
ralen
);
/* Zero the extent if we were asked to do so */
if
(
ap
->
userdata
&
XFS_ALLOC_USERDATA_ZERO
)
{
error
=
xfs_zero_extent
(
ap
->
ip
,
ap
->
blkno
,
ap
->
length
);
if
(
error
)
return
error
;
}
}
else
{
ap
->
length
=
0
;
}
...
...
fs/xfs/xfs_file.c
View file @
264e89ad
...
...
@@ -1493,7 +1493,7 @@ xfs_file_llseek(
*
* mmap_sem (MM)
* sb_start_pagefault(vfs, freeze)
* i_mmap
_
lock (XFS - truncate serialisation)
* i_mmaplock (XFS - truncate serialisation)
* page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/
...
...
@@ -1519,8 +1519,7 @@ xfs_filemap_page_mkwrite(
xfs_ilock
(
XFS_I
(
inode
),
XFS_MMAPLOCK_SHARED
);
if
(
IS_DAX
(
inode
))
{
ret
=
__dax_mkwrite
(
vma
,
vmf
,
xfs_get_blocks_direct
,
xfs_end_io_dax_write
);
ret
=
__dax_mkwrite
(
vma
,
vmf
,
xfs_get_blocks_dax_fault
,
NULL
);
}
else
{
ret
=
__block_page_mkwrite
(
vma
,
vmf
,
xfs_get_blocks
);
ret
=
block_page_mkwrite_return
(
ret
);
...
...
@@ -1554,7 +1553,7 @@ xfs_filemap_fault(
* changes to xfs_get_blocks_direct() to map unwritten extent
* ioend for conversion on read-only mappings.
*/
ret
=
__dax_fault
(
vma
,
vmf
,
xfs_get_blocks_d
irec
t
,
NULL
);
ret
=
__dax_fault
(
vma
,
vmf
,
xfs_get_blocks_d
ax_faul
t
,
NULL
);
}
else
ret
=
filemap_fault
(
vma
,
vmf
);
xfs_iunlock
(
XFS_I
(
inode
),
XFS_MMAPLOCK_SHARED
);
...
...
@@ -1562,6 +1561,13 @@ xfs_filemap_fault(
return
ret
;
}
/*
* Similar to xfs_filemap_fault(), the DAX fault path can call into here on
* both read and write faults. Hence we need to handle both cases. There is no
* ->pmd_mkwrite callout for huge pages, so we have a single function here to
* handle both cases here. @flags carries the information on the type of fault
* occuring.
*/
STATIC
int
xfs_filemap_pmd_fault
(
struct
vm_area_struct
*
vma
,
...
...
@@ -1578,22 +1584,62 @@ xfs_filemap_pmd_fault(
trace_xfs_filemap_pmd_fault
(
ip
);
if
(
flags
&
FAULT_FLAG_WRITE
)
{
sb_start_pagefault
(
inode
->
i_sb
);
file_update_time
(
vma
->
vm_file
);
}
xfs_ilock
(
XFS_I
(
inode
),
XFS_MMAPLOCK_SHARED
);
ret
=
__dax_pmd_fault
(
vma
,
addr
,
pmd
,
flags
,
xfs_get_blocks_d
irec
t
,
xfs_end_io_dax_write
);
ret
=
__dax_pmd_fault
(
vma
,
addr
,
pmd
,
flags
,
xfs_get_blocks_d
ax_faul
t
,
NULL
);
xfs_iunlock
(
XFS_I
(
inode
),
XFS_MMAPLOCK_SHARED
);
if
(
flags
&
FAULT_FLAG_WRITE
)
sb_end_pagefault
(
inode
->
i_sb
);
return
ret
;
}
/*
* pfn_mkwrite was originally inteneded to ensure we capture time stamp
* updates on write faults. In reality, it's need to serialise against
* truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
* here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
* barrier in place.
*/
static
int
xfs_filemap_pfn_mkwrite
(
struct
vm_area_struct
*
vma
,
struct
vm_fault
*
vmf
)
{
struct
inode
*
inode
=
file_inode
(
vma
->
vm_file
);
struct
xfs_inode
*
ip
=
XFS_I
(
inode
);
int
ret
=
VM_FAULT_NOPAGE
;
loff_t
size
;
trace_xfs_filemap_pfn_mkwrite
(
ip
);
sb_start_pagefault
(
inode
->
i_sb
);
file_update_time
(
vma
->
vm_file
);
/* check if the faulting page hasn't raced with truncate */
xfs_ilock
(
ip
,
XFS_MMAPLOCK_SHARED
);
size
=
(
i_size_read
(
inode
)
+
PAGE_SIZE
-
1
)
>>
PAGE_SHIFT
;
if
(
vmf
->
pgoff
>=
size
)
ret
=
VM_FAULT_SIGBUS
;
xfs_iunlock
(
ip
,
XFS_MMAPLOCK_SHARED
);
sb_end_pagefault
(
inode
->
i_sb
);
return
ret
;
}
static
const
struct
vm_operations_struct
xfs_file_vm_ops
=
{
.
fault
=
xfs_filemap_fault
,
.
pmd_fault
=
xfs_filemap_pmd_fault
,
.
map_pages
=
filemap_map_pages
,
.
page_mkwrite
=
xfs_filemap_page_mkwrite
,
.
pfn_mkwrite
=
xfs_filemap_pfn_mkwrite
,
};
STATIC
int
...
...
fs/xfs/xfs_iomap.c
View file @
264e89ad
...
...
@@ -132,6 +132,7 @@ xfs_iomap_write_direct(
int
committed
;
int
error
;
int
lockmode
;
int
bmapi_flags
=
XFS_BMAPI_PREALLOC
;
rt
=
XFS_IS_REALTIME_INODE
(
ip
);
extsz
=
xfs_get_extsz_hint
(
ip
);
...
...
@@ -195,6 +196,23 @@ xfs_iomap_write_direct(
* Allocate and setup the transaction
*/
tp
=
xfs_trans_alloc
(
mp
,
XFS_TRANS_DIOSTRAT
);
/*
* For DAX, we do not allocate unwritten extents, but instead we zero
* the block before we commit the transaction. Ideally we'd like to do
* this outside the transaction context, but if we commit and then crash
* we may not have zeroed the blocks and this will be exposed on
* recovery of the allocation. Hence we must zero before commit.
* Further, if we are mapping unwritten extents here, we need to zero
* and convert them to written so that we don't need an unwritten extent
* callback for DAX. This also means that we need to be able to dip into
* the reserve block pool if there is no space left but we need to do
* unwritten extent conversion.
*/
if
(
IS_DAX
(
VFS_I
(
ip
)))
{
bmapi_flags
=
XFS_BMAPI_CONVERT
|
XFS_BMAPI_ZERO
;
tp
->
t_flags
|=
XFS_TRANS_RESERVE
;
}
error
=
xfs_trans_reserve
(
tp
,
&
M_RES
(
mp
)
->
tr_write
,
resblks
,
resrtextents
);
/*
...
...
@@ -221,7 +239,7 @@ xfs_iomap_write_direct(
xfs_bmap_init
(
&
free_list
,
&
firstfsb
);
nimaps
=
1
;
error
=
xfs_bmapi_write
(
tp
,
ip
,
offset_fsb
,
count_fsb
,
XFS_BMAPI_PREALLOC
,
&
firstfsb
,
resblks
,
imap
,
bmapi_flags
,
&
firstfsb
,
resblks
,
imap
,
&
nimaps
,
&
free_list
);
if
(
error
)
goto
out_bmap_cancel
;
...
...
@@ -232,6 +250,7 @@ xfs_iomap_write_direct(
error
=
xfs_bmap_finish
(
&
tp
,
&
free_list
,
&
committed
);
if
(
error
)
goto
out_bmap_cancel
;
error
=
xfs_trans_commit
(
tp
);
if
(
error
)
goto
out_unlock
;
...
...
fs/xfs/xfs_mount.h
View file @
264e89ad
...
...
@@ -338,4 +338,7 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern
void
xfs_set_low_space_thresholds
(
struct
xfs_mount
*
);
int
xfs_zero_extent
(
struct
xfs_inode
*
ip
,
xfs_fsblock_t
start_fsb
,
xfs_off_t
count_fsb
);
#endif
/* __XFS_MOUNT_H__ */
fs/xfs/xfs_trace.h
View file @
264e89ad
...
...
@@ -689,6 +689,7 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
DEFINE_INODE_EVENT
(
xfs_filemap_fault
);
DEFINE_INODE_EVENT
(
xfs_filemap_pmd_fault
);
DEFINE_INODE_EVENT
(
xfs_filemap_page_mkwrite
);
DEFINE_INODE_EVENT
(
xfs_filemap_pfn_mkwrite
);
DECLARE_EVENT_CLASS
(
xfs_iref_class
,
TP_PROTO
(
struct
xfs_inode
*
ip
,
unsigned
long
caller_ip
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment