Commit c0894c6c authored by Oleg Drokin's avatar Oleg Drokin Committed by Greg Kroah-Hartman

staging/lustre/llite: Adjust comments to better conform to coding style

This patch fixes "Block comments use a trailing */ on a separate line"
warnings from checkpatch.
Signed-off-by: default avatarOleg Drokin <green@linuxhacker.ru>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 6f789a6a
...@@ -80,7 +80,8 @@ static void ll_release(struct dentry *de) ...@@ -80,7 +80,8 @@ static void ll_release(struct dentry *de)
* This avoids a race where ll_lookup_it() instantiates a dentry, but we get * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
* an AST before calling d_revalidate_it(). The dentry still exists (marked * an AST before calling d_revalidate_it(). The dentry still exists (marked
* INVALID) so d_lookup() matches it, but we have no lock on it (so * INVALID) so d_lookup() matches it, but we have no lock on it (so
* lock_match() fails) and we spin around real_lookup(). */ * lock_match() fails) and we spin around real_lookup().
*/
static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry, static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, unsigned int len, const char *str,
const struct qstr *name) const struct qstr *name)
...@@ -117,7 +118,8 @@ static inline int return_if_equal(struct ldlm_lock *lock, void *data) ...@@ -117,7 +118,8 @@ static inline int return_if_equal(struct ldlm_lock *lock, void *data)
/* find any ldlm lock of the inode in mdc and lov /* find any ldlm lock of the inode in mdc and lov
* return 0 not find * return 0 not find
* 1 find one * 1 find one
* < 0 error */ * < 0 error
*/
static int find_cbdata(struct inode *inode) static int find_cbdata(struct inode *inode)
{ {
struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_sb_info *sbi = ll_i2sbi(inode);
...@@ -163,10 +165,12 @@ static int ll_ddelete(const struct dentry *de) ...@@ -163,10 +165,12 @@ static int ll_ddelete(const struct dentry *de)
/* Disable this piece of code temporarily because this is called /* Disable this piece of code temporarily because this is called
* inside dcache_lock so it's not appropriate to do lots of work * inside dcache_lock so it's not appropriate to do lots of work
* here. ATTENTION: Before this piece of code enabling, LU-2487 must be * here. ATTENTION: Before this piece of code enabling, LU-2487 must be
* resolved. */ * resolved.
*/
#if 0 #if 0
/* if not ldlm lock for this inode, set i_nlink to 0 so that /* if not ldlm lock for this inode, set i_nlink to 0 so that
* this inode can be recycled later b=20433 */ * this inode can be recycled later b=20433
*/
if (d_really_is_positive(de) && !find_cbdata(d_inode(de))) if (d_really_is_positive(de) && !find_cbdata(d_inode(de)))
clear_nlink(d_inode(de)); clear_nlink(d_inode(de));
#endif #endif
...@@ -216,7 +220,8 @@ void ll_intent_drop_lock(struct lookup_intent *it) ...@@ -216,7 +220,8 @@ void ll_intent_drop_lock(struct lookup_intent *it)
ldlm_lock_decref(&handle, it->d.lustre.it_lock_mode); ldlm_lock_decref(&handle, it->d.lustre.it_lock_mode);
/* bug 494: intent_release may be called multiple times, from /* bug 494: intent_release may be called multiple times, from
* this thread and we don't want to double-decref this lock */ * this thread and we don't want to double-decref this lock
*/
it->d.lustre.it_lock_mode = 0; it->d.lustre.it_lock_mode = 0;
if (it->d.lustre.it_remote_lock_mode != 0) { if (it->d.lustre.it_remote_lock_mode != 0) {
handle.cookie = it->d.lustre.it_remote_lock_handle; handle.cookie = it->d.lustre.it_remote_lock_handle;
...@@ -294,7 +299,8 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode) ...@@ -294,7 +299,8 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) { if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
/* on 2.6 there are situation when several lookups and /* on 2.6 there are situation when several lookups and
* revalidations may be requested during single operation. * revalidations may be requested during single operation.
* therefore, we don't release intent here -bzzz */ * therefore, we don't release intent here -bzzz
*/
ll_intent_drop_lock(it); ll_intent_drop_lock(it);
} }
} }
......
...@@ -379,7 +379,8 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, ...@@ -379,7 +379,8 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
&it.d.lustre.it_lock_handle, dir, NULL); &it.d.lustre.it_lock_handle, dir, NULL);
} else { } else {
/* for cross-ref object, l_ast_data of the lock may not be set, /* for cross-ref object, l_ast_data of the lock may not be set,
* we reset it here */ * we reset it here
*/
md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie, md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie,
dir, NULL); dir, NULL);
} }
...@@ -737,8 +738,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, ...@@ -737,8 +738,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
} }
/* In the following we use the fact that LOV_USER_MAGIC_V1 and /* In the following we use the fact that LOV_USER_MAGIC_V1 and
LOV_USER_MAGIC_V3 have the same initial fields so we do not * LOV_USER_MAGIC_V3 have the same initial fields so we do not
need to make the distinction between the 2 versions */ * need to make the distinction between the 2 versions
*/
if (set_default && mgc->u.cli.cl_mgc_mgsexp) { if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
char *param = NULL; char *param = NULL;
char *buf; char *buf;
...@@ -929,7 +931,8 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) ...@@ -929,7 +931,8 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
} }
/* Store it the hsm_copy for later copytool use. /* Store it the hsm_copy for later copytool use.
* Always modified even if no lsm. */ * Always modified even if no lsm.
*/
copy->hc_data_version = data_version; copy->hc_data_version = data_version;
} }
...@@ -1006,11 +1009,13 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) ...@@ -1006,11 +1009,13 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
} }
/* Store it the hsm_copy for later copytool use. /* Store it the hsm_copy for later copytool use.
* Always modified even if no lsm. */ * Always modified even if no lsm.
*/
hpk.hpk_data_version = data_version; hpk.hpk_data_version = data_version;
/* File could have been stripped during archiving, so we need /* File could have been stripped during archiving, so we need
* to check anyway. */ * to check anyway.
*/
if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) && if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) &&
(copy->hc_data_version != data_version)) { (copy->hc_data_version != data_version)) {
CDEBUG(D_HSM, "File data version mismatched. File content was changed during archiving. " CDEBUG(D_HSM, "File data version mismatched. File content was changed during archiving. "
...@@ -1022,7 +1027,8 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) ...@@ -1022,7 +1027,8 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
* the cdt will loop on retried archive requests. * the cdt will loop on retried archive requests.
* The policy engine will ask for a new archive later * The policy engine will ask for a new archive later
* when the file will not be modified for some tunable * when the file will not be modified for some tunable
* time */ * time
*/
/* we do not notify caller */ /* we do not notify caller */
hpk.hpk_flags &= ~HP_FLAG_RETRY; hpk.hpk_flags &= ~HP_FLAG_RETRY;
/* hpk_errval must be >= 0 */ /* hpk_errval must be >= 0 */
...@@ -1150,7 +1156,8 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) ...@@ -1150,7 +1156,8 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
return rc; return rc;
} }
/* If QIF_SPACE is not set, client should collect the /* If QIF_SPACE is not set, client should collect the
* space usage from OSSs by itself */ * space usage from OSSs by itself
*/
if (cmd == Q_GETQUOTA && if (cmd == Q_GETQUOTA &&
!(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) && !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
!oqctl->qc_dqblk.dqb_curspace) { !oqctl->qc_dqblk.dqb_curspace) {
...@@ -1201,7 +1208,8 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) ...@@ -1201,7 +1208,8 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
/* This function tries to get a single name component, /* This function tries to get a single name component,
* to send to the server. No actual path traversal involved, * to send to the server. No actual path traversal involved,
* so we limit to NAME_MAX */ * so we limit to NAME_MAX
*/
static char *ll_getname(const char __user *filename) static char *ll_getname(const char __user *filename)
{ {
int ret = 0, len; int ret = 0, len;
...@@ -1803,7 +1811,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -1803,7 +1811,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
hpk.hpk_data_version = 0; hpk.hpk_data_version = 0;
/* File may not exist in Lustre; all progress /* File may not exist in Lustre; all progress
* reported to Lustre root */ * reported to Lustre root
*/
rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk, rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk,
NULL); NULL);
return rc; return rc;
......
This diff is collapsed.
...@@ -76,7 +76,8 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) ...@@ -76,7 +76,8 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
/** Queues DONE_WRITING if /** Queues DONE_WRITING if
* - done writing is allowed; * - done writing is allowed;
* - inode has no no dirty pages; */ * - inode has no no dirty pages;
*/
void ll_queue_done_writing(struct inode *inode, unsigned long flags) void ll_queue_done_writing(struct inode *inode, unsigned long flags)
{ {
struct ll_inode_info *lli = ll_i2info(inode); struct ll_inode_info *lli = ll_i2info(inode);
...@@ -106,7 +107,8 @@ void ll_queue_done_writing(struct inode *inode, unsigned long flags) ...@@ -106,7 +107,8 @@ void ll_queue_done_writing(struct inode *inode, unsigned long flags)
* close() happen, epoch is closed as the inode is marked as * close() happen, epoch is closed as the inode is marked as
* LLIF_EPOCH_PENDING. When pages are written inode should not * LLIF_EPOCH_PENDING. When pages are written inode should not
* be inserted into the queue again, clear this flag to avoid * be inserted into the queue again, clear this flag to avoid
* it. */ * it.
*/
lli->lli_flags &= ~LLIF_DONE_WRITING; lli->lli_flags &= ~LLIF_DONE_WRITING;
wake_up(&lcq->lcq_waitq); wake_up(&lcq->lcq_waitq);
...@@ -147,7 +149,8 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, ...@@ -147,7 +149,8 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
LASSERT(*och); LASSERT(*och);
LASSERT(!lli->lli_pending_och); LASSERT(!lli->lli_pending_och);
/* Inode is dirty and there is no pending write done /* Inode is dirty and there is no pending write done
* request yet, DONE_WRITE is to be sent later. */ * request yet, DONE_WRITE is to be sent later.
*/
lli->lli_flags |= LLIF_EPOCH_PENDING; lli->lli_flags |= LLIF_EPOCH_PENDING;
lli->lli_pending_och = *och; lli->lli_pending_och = *och;
spin_unlock(&lli->lli_lock); spin_unlock(&lli->lli_lock);
...@@ -159,7 +162,8 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, ...@@ -159,7 +162,8 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
if (flags & LLIF_DONE_WRITING) { if (flags & LLIF_DONE_WRITING) {
/* Some pages are still dirty, it is early to send /* Some pages are still dirty, it is early to send
* DONE_WRITE. Wait until all pages will be flushed * DONE_WRITE. Wait until all pages will be flushed
* and try DONE_WRITE again later. */ * and try DONE_WRITE again later.
*/
LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING)); LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING));
lli->lli_flags |= LLIF_DONE_WRITING; lli->lli_flags |= LLIF_DONE_WRITING;
spin_unlock(&lli->lli_lock); spin_unlock(&lli->lli_lock);
...@@ -187,7 +191,8 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, ...@@ -187,7 +191,8 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
} }
/* There is a pending DONE_WRITE -- close epoch with no /* There is a pending DONE_WRITE -- close epoch with no
* attribute change. */ * attribute change.
*/
if (lli->lli_flags & LLIF_EPOCH_PENDING) { if (lli->lli_flags & LLIF_EPOCH_PENDING) {
spin_unlock(&lli->lli_lock); spin_unlock(&lli->lli_lock);
goto out; goto out;
...@@ -295,7 +300,8 @@ static void ll_done_writing(struct inode *inode) ...@@ -295,7 +300,8 @@ static void ll_done_writing(struct inode *inode)
rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL); rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL);
if (rc == -EAGAIN) if (rc == -EAGAIN)
/* MDS has instructed us to obtain Size-on-MDS attribute from /* MDS has instructed us to obtain Size-on-MDS attribute from
* OSTs and send setattr to back to MDS. */ * OSTs and send setattr to back to MDS.
*/
rc = ll_som_update(inode, op_data); rc = ll_som_update(inode, op_data);
else if (rc) else if (rc)
CERROR("inode %lu mdc done_writing failed: rc = %d\n", CERROR("inode %lu mdc done_writing failed: rc = %d\n",
......
...@@ -93,9 +93,10 @@ struct ll_remote_perm { ...@@ -93,9 +93,10 @@ struct ll_remote_perm {
gid_t lrp_gid; gid_t lrp_gid;
uid_t lrp_fsuid; uid_t lrp_fsuid;
gid_t lrp_fsgid; gid_t lrp_fsgid;
int lrp_access_perm; /* MAY_READ/WRITE/EXEC, this int lrp_access_perm; /* MAY_READ/WRITE/EXEC, this
is access permission with * is access permission with
lrp_fsuid/lrp_fsgid. */ * lrp_fsuid/lrp_fsgid.
*/
}; };
enum lli_flags { enum lli_flags {
...@@ -106,7 +107,8 @@ enum lli_flags { ...@@ -106,7 +107,8 @@ enum lli_flags {
/* DONE WRITING is allowed. */ /* DONE WRITING is allowed. */
LLIF_DONE_WRITING = (1 << 2), LLIF_DONE_WRITING = (1 << 2),
/* Sizeon-on-MDS attributes are changed. An attribute update needs to /* Sizeon-on-MDS attributes are changed. An attribute update needs to
* be sent to MDS. */ * be sent to MDS.
*/
LLIF_SOM_DIRTY = (1 << 3), LLIF_SOM_DIRTY = (1 << 3),
/* File data is modified. */ /* File data is modified. */
LLIF_DATA_MODIFIED = (1 << 4), LLIF_DATA_MODIFIED = (1 << 4),
...@@ -130,7 +132,8 @@ struct ll_inode_info { ...@@ -130,7 +132,8 @@ struct ll_inode_info {
/* identifying fields for both metadata and data stacks. */ /* identifying fields for both metadata and data stacks. */
struct lu_fid lli_fid; struct lu_fid lli_fid;
/* Parent fid for accessing default stripe data on parent directory /* Parent fid for accessing default stripe data on parent directory
* for allocating OST objects after a mknod() and later open-by-FID. */ * for allocating OST objects after a mknod() and later open-by-FID.
*/
struct lu_fid lli_pfid; struct lu_fid lli_pfid;
struct list_head lli_close_list; struct list_head lli_close_list;
...@@ -139,11 +142,13 @@ struct ll_inode_info { ...@@ -139,11 +142,13 @@ struct ll_inode_info {
/* handle is to be sent to MDS later on done_writing and setattr. /* handle is to be sent to MDS later on done_writing and setattr.
* Open handle data are needed for the recovery to reconstruct * Open handle data are needed for the recovery to reconstruct
* the inode state on the MDS. XXX: recovery is not ready yet. */ * the inode state on the MDS. XXX: recovery is not ready yet.
*/
struct obd_client_handle *lli_pending_och; struct obd_client_handle *lli_pending_och;
/* We need all three because every inode may be opened in different /* We need all three because every inode may be opened in different
* modes */ * modes
*/
struct obd_client_handle *lli_mds_read_och; struct obd_client_handle *lli_mds_read_och;
struct obd_client_handle *lli_mds_write_och; struct obd_client_handle *lli_mds_write_och;
struct obd_client_handle *lli_mds_exec_och; struct obd_client_handle *lli_mds_exec_och;
...@@ -160,7 +165,8 @@ struct ll_inode_info { ...@@ -160,7 +165,8 @@ struct ll_inode_info {
spinlock_t lli_agl_lock; spinlock_t lli_agl_lock;
/* Try to make the d::member and f::member are aligned. Before using /* Try to make the d::member and f::member are aligned. Before using
* these members, make clear whether it is directory or not. */ * these members, make clear whether it is directory or not.
*/
union { union {
/* for directory */ /* for directory */
struct { struct {
...@@ -171,13 +177,15 @@ struct ll_inode_info { ...@@ -171,13 +177,15 @@ struct ll_inode_info {
/* since parent-child threads can share the same @file /* since parent-child threads can share the same @file
* struct, "opendir_key" is the token when dir close for * struct, "opendir_key" is the token when dir close for
* case of parent exit before child -- it is me should * case of parent exit before child -- it is me should
* cleanup the dir readahead. */ * cleanup the dir readahead.
*/
void *d_opendir_key; void *d_opendir_key;
struct ll_statahead_info *d_sai; struct ll_statahead_info *d_sai;
/* protect statahead stuff. */ /* protect statahead stuff. */
spinlock_t d_sa_lock; spinlock_t d_sa_lock;
/* "opendir_pid" is the token when lookup/revalid /* "opendir_pid" is the token when lookup/revalidate
* -- I am the owner of dir statahead. */ * -- I am the owner of dir statahead.
*/
pid_t d_opendir_pid; pid_t d_opendir_pid;
} d; } d;
...@@ -303,7 +311,8 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode) ...@@ -303,7 +311,8 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode)
} }
/* default to about 40meg of readahead on a given system. That much tied /* default to about 40meg of readahead on a given system. That much tied
* up in 512k readahead requests serviced at 40ms each is about 1GB/s. */ * up in 512k readahead requests serviced at 40ms each is about 1GB/s.
*/
#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT)) #define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT))
/* default to read-ahead full files smaller than 2MB on the second read */ /* default to read-ahead full files smaller than 2MB on the second read */
...@@ -342,11 +351,13 @@ struct ra_io_arg { ...@@ -342,11 +351,13 @@ struct ra_io_arg {
unsigned long ria_end; /* end offset of read-ahead*/ unsigned long ria_end; /* end offset of read-ahead*/
/* If stride read pattern is detected, ria_stoff means where /* If stride read pattern is detected, ria_stoff means where
* stride read is started. Note: for normal read-ahead, the * stride read is started. Note: for normal read-ahead, the
* value here is meaningless, and also it will not be accessed*/ * value here is meaningless, and also it will not be accessed
*/
pgoff_t ria_stoff; pgoff_t ria_stoff;
/* ria_length and ria_pages are the length and pages length in the /* ria_length and ria_pages are the length and pages length in the
* stride I/O mode. And they will also be used to check whether * stride I/O mode. And they will also be used to check whether
* it is stride I/O read-ahead in the read-ahead pages*/ * it is stride I/O read-ahead in the read-ahead pages
*/
unsigned long ria_length; unsigned long ria_length;
unsigned long ria_pages; unsigned long ria_pages;
}; };
...@@ -453,7 +464,8 @@ struct eacl_table { ...@@ -453,7 +464,8 @@ struct eacl_table {
struct ll_sb_info { struct ll_sb_info {
/* this protects pglist and ra_info. It isn't safe to /* this protects pglist and ra_info. It isn't safe to
* grab from interrupt contexts */ * grab from interrupt contexts
*/
spinlock_t ll_lock; spinlock_t ll_lock;
spinlock_t ll_pp_extent_lock; /* pp_extent entry*/ spinlock_t ll_pp_extent_lock; /* pp_extent entry*/
spinlock_t ll_process_lock; /* ll_rw_process_info */ spinlock_t ll_process_lock; /* ll_rw_process_info */
...@@ -500,13 +512,16 @@ struct ll_sb_info { ...@@ -500,13 +512,16 @@ struct ll_sb_info {
/* metadata stat-ahead */ /* metadata stat-ahead */
unsigned int ll_sa_max; /* max statahead RPCs */ unsigned int ll_sa_max; /* max statahead RPCs */
atomic_t ll_sa_total; /* statahead thread started atomic_t ll_sa_total; /* statahead thread started
* count */ * count
*/
atomic_t ll_sa_wrong; /* statahead thread stopped for atomic_t ll_sa_wrong; /* statahead thread stopped for
* low hit ratio */ * low hit ratio
*/
atomic_t ll_agl_total; /* AGL thread started count */ atomic_t ll_agl_total; /* AGL thread started count */
dev_t ll_sdev_orig; /* save s_dev before assign for dev_t ll_sdev_orig; /* save s_dev before assign for
* clustered nfs */ * clustered nfs
*/
struct rmtacl_ctl_table ll_rct; struct rmtacl_ctl_table ll_rct;
struct eacl_table ll_et; struct eacl_table ll_et;
__kernel_fsid_t ll_fsid; __kernel_fsid_t ll_fsid;
...@@ -617,13 +632,15 @@ struct ll_file_data { ...@@ -617,13 +632,15 @@ struct ll_file_data {
__u32 fd_flags; __u32 fd_flags;
fmode_t fd_omode; fmode_t fd_omode;
/* openhandle if lease exists for this file. /* openhandle if lease exists for this file.
* Borrow lli->lli_och_mutex to protect assignment */ * Borrow lli->lli_och_mutex to protect assignment
*/
struct obd_client_handle *fd_lease_och; struct obd_client_handle *fd_lease_och;
struct obd_client_handle *fd_och; struct obd_client_handle *fd_och;
struct file *fd_file; struct file *fd_file;
/* Indicate whether need to report failure when close. /* Indicate whether need to report failure when close.
* true: failure is known, not report again. * true: failure is known, not report again.
* false: unknown failure, should report. */ * false: unknown failure, should report.
*/
bool fd_write_failed; bool fd_write_failed;
}; };
...@@ -1105,39 +1122,44 @@ static inline u64 rce_ops2valid(int ops) ...@@ -1105,39 +1122,44 @@ static inline u64 rce_ops2valid(int ops)
struct ll_statahead_info { struct ll_statahead_info {
struct inode *sai_inode; struct inode *sai_inode;
atomic_t sai_refcount; /* when access this struct, hold atomic_t sai_refcount; /* when access this struct, hold
* refcount */ * refcount
*/
unsigned int sai_generation; /* generation for statahead */ unsigned int sai_generation; /* generation for statahead */
unsigned int sai_max; /* max ahead of lookup */ unsigned int sai_max; /* max ahead of lookup */
__u64 sai_sent; /* stat requests sent count */ __u64 sai_sent; /* stat requests sent count */
__u64 sai_replied; /* stat requests which received __u64 sai_replied; /* stat requests which received
* reply */ * reply
*/
__u64 sai_index; /* index of statahead entry */ __u64 sai_index; /* index of statahead entry */
__u64 sai_index_wait; /* index of entry which is the __u64 sai_index_wait; /* index of entry which is the
* caller is waiting for */ * caller is waiting for
*/
__u64 sai_hit; /* hit count */ __u64 sai_hit; /* hit count */
__u64 sai_miss; /* miss count: __u64 sai_miss; /* miss count:
* for "ls -al" case, it includes * for "ls -al" case, it includes
* hidden dentry miss; * hidden dentry miss;
* for "ls -l" case, it does not * for "ls -l" case, it does not
* include hidden dentry miss. * include hidden dentry miss.
* "sai_miss_hidden" is used for * "sai_miss_hidden" is used for
* the later case. * the later case.
*/ */
unsigned int sai_consecutive_miss; /* consecutive miss */ unsigned int sai_consecutive_miss; /* consecutive miss */
unsigned int sai_miss_hidden;/* "ls -al", but first dentry unsigned int sai_miss_hidden;/* "ls -al", but first dentry
* is not a hidden one */ * is not a hidden one
*/
unsigned int sai_skip_hidden;/* skipped hidden dentry count */ unsigned int sai_skip_hidden;/* skipped hidden dentry count */
unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
* hidden entries */ * hidden entries
*/
sai_agl_valid:1;/* AGL is valid for the dir */ sai_agl_valid:1;/* AGL is valid for the dir */
wait_queue_head_t sai_waitq; /* stat-ahead wait queue */ wait_queue_head_t sai_waitq; /* stat-ahead wait queue */
struct ptlrpc_thread sai_thread; /* stat-ahead thread */ struct ptlrpc_thread sai_thread; /* stat-ahead thread */
struct ptlrpc_thread sai_agl_thread; /* AGL thread */ struct ptlrpc_thread sai_agl_thread; /* AGL thread */
struct list_head sai_entries; /* entry list */ struct list_head sai_entries; /* entry list */
struct list_head sai_entries_received; /* entries returned */ struct list_head sai_entries_received; /* entries returned */
struct list_head sai_entries_stated; /* entries stated */ struct list_head sai_entries_stated; /* entries stated */
struct list_head sai_entries_agl; /* AGL entries to be sent */ struct list_head sai_entries_agl; /* AGL entries to be sent */
struct list_head sai_cache[LL_SA_CACHE_SIZE]; struct list_head sai_cache[LL_SA_CACHE_SIZE];
spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE]; spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
atomic_t sai_cache_count; /* entry count in cache */ atomic_t sai_cache_count; /* entry count in cache */
}; };
...@@ -1311,13 +1333,15 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, ...@@ -1311,13 +1333,15 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
/** direct write pages */ /** direct write pages */
struct ll_dio_pages { struct ll_dio_pages {
/** page array to be written. we don't support /** page array to be written. we don't support
* partial pages except the last one. */ * partial pages except the last one.
*/
struct page **ldp_pages; struct page **ldp_pages;
/* offset of each page */ /* offset of each page */
loff_t *ldp_offsets; loff_t *ldp_offsets;
/** if ldp_offsets is NULL, it means a sequential /** if ldp_offsets is NULL, it means a sequential
* pages to be written, then this is the file offset * pages to be written, then this is the file offset
* of the * first page. */ * of the first page.
*/
loff_t ldp_start_offset; loff_t ldp_start_offset;
/** how many bytes are to be written. */ /** how many bytes are to be written. */
size_t ldp_size; size_t ldp_size;
...@@ -1359,7 +1383,8 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode, ...@@ -1359,7 +1383,8 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
* remote MDT, where the object is, will grant * remote MDT, where the object is, will grant
* UPDATE|PERM lock. The inode will be attached to both * UPDATE|PERM lock. The inode will be attached to both
* LOOKUP and PERM locks, so revoking either locks will * LOOKUP and PERM locks, so revoking either locks will
* case the dcache being cleared */ * case the dcache being cleared
*/
if (it->d.lustre.it_remote_lock_mode) { if (it->d.lustre.it_remote_lock_mode) {
handle.cookie = it->d.lustre.it_remote_lock_handle; handle.cookie = it->d.lustre.it_remote_lock_handle;
CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n", CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n",
......
...@@ -151,8 +151,7 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, ...@@ -151,8 +151,7 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
LASSERT(cio->cui_cl.cis_io == io); LASSERT(cio->cui_cl.cis_io == io);
/* mmap lock must be MANDATORY it has to cache /* mmap lock must be MANDATORY it has to cache pages. */
* pages. */
io->ci_lockreq = CILR_MANDATORY; io->ci_lockreq = CILR_MANDATORY;
cio->cui_fd = fd; cio->cui_fd = fd;
} else { } else {
...@@ -199,7 +198,8 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, ...@@ -199,7 +198,8 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
/* we grab lli_trunc_sem to exclude truncate case. /* we grab lli_trunc_sem to exclude truncate case.
* Otherwise, we could add dirty pages into osc cache * Otherwise, we could add dirty pages into osc cache
* while truncate is on-going. */ * while truncate is on-going.
*/
inode = ccc_object_inode(io->ci_obj); inode = ccc_object_inode(io->ci_obj);
lli = ll_i2info(inode); lli = ll_i2info(inode);
down_read(&lli->lli_trunc_sem); down_read(&lli->lli_trunc_sem);
...@@ -220,7 +220,8 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, ...@@ -220,7 +220,8 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
/* page was truncated and lock was cancelled, return /* page was truncated and lock was cancelled, return
* ENODATA so that VM_FAULT_NOPAGE will be returned * ENODATA so that VM_FAULT_NOPAGE will be returned
* to handle_mm_fault(). */ * to handle_mm_fault().
*/
if (result == 0) if (result == 0)
result = -ENODATA; result = -ENODATA;
} else if (!PageDirty(vmpage)) { } else if (!PageDirty(vmpage)) {
...@@ -313,7 +314,8 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -313,7 +314,8 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
result = cl_io_loop(env, io); result = cl_io_loop(env, io);
/* ft_flags are only valid if we reached /* ft_flags are only valid if we reached
* the call to filemap_fault */ * the call to filemap_fault
*/
if (vio->u.fault.fault.ft_flags_valid) if (vio->u.fault.fault.ft_flags_valid)
fault_ret = vio->u.fault.fault.ft_flags; fault_ret = vio->u.fault.fault.ft_flags;
...@@ -342,9 +344,10 @@ static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -342,9 +344,10 @@ static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
int result; int result;
sigset_t set; sigset_t set;
/* Only SIGKILL and SIGTERM is allowed for fault/nopage/mkwrite /* Only SIGKILL and SIGTERM are allowed for fault/nopage/mkwrite
* so that it can be killed by admin but not cause segfault by * so that it can be killed by admin but not cause segfault by
* other signals. */ * other signals.
*/
set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));
restart: restart:
...@@ -445,7 +448,8 @@ static void ll_vm_close(struct vm_area_struct *vma) ...@@ -445,7 +448,8 @@ static void ll_vm_close(struct vm_area_struct *vma)
} }
/* XXX put nice comment here. talk about __free_pte -> dirty pages and /* XXX put nice comment here. talk about __free_pte -> dirty pages and
* nopage's reference passing to the pte */ * nopage's reference passing to the pte
*/
int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last) int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last)
{ {
int rc = -ENOENT; int rc = -ENOENT;
......
...@@ -105,7 +105,8 @@ struct inode *search_inode_for_lustre(struct super_block *sb, ...@@ -105,7 +105,8 @@ struct inode *search_inode_for_lustre(struct super_block *sb,
return ERR_PTR(rc); return ERR_PTR(rc);
/* Because inode is NULL, ll_prep_md_op_data can not /* Because inode is NULL, ll_prep_md_op_data can not
* be used here. So we allocate op_data ourselves */ * be used here. So we allocate op_data ourselves
*/
op_data = kzalloc(sizeof(*op_data), GFP_NOFS); op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
if (!op_data) if (!op_data)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -209,7 +210,8 @@ static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name, ...@@ -209,7 +210,8 @@ static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
unsigned type) unsigned type)
{ {
/* It is hack to access lde_fid for comparison with lgd_fid. /* It is hack to access lde_fid for comparison with lgd_fid.
* So the input 'name' must be part of the 'lu_dirent'. */ * So the input 'name' must be part of the 'lu_dirent'.
*/
struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name); struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name);
struct ll_getname_data *lgd = struct ll_getname_data *lgd =
container_of(ctx, struct ll_getname_data, ctx); container_of(ctx, struct ll_getname_data, ctx);
......
...@@ -345,7 +345,8 @@ static ssize_t max_read_ahead_whole_mb_store(struct kobject *kobj, ...@@ -345,7 +345,8 @@ static ssize_t max_read_ahead_whole_mb_store(struct kobject *kobj,
return rc; return rc;
/* Cap this at the current max readahead window size, the readahead /* Cap this at the current max readahead window size, the readahead
* algorithm does this anyway so it's pointless to set it larger. */ * algorithm does this anyway so it's pointless to set it larger.
*/
if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) { if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
CERROR("can't set max_read_ahead_whole_mb more than max_read_ahead_per_file_mb: %lu\n", CERROR("can't set max_read_ahead_whole_mb more than max_read_ahead_per_file_mb: %lu\n",
sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_CACHE_SHIFT)); sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_CACHE_SHIFT));
......
...@@ -180,7 +180,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, ...@@ -180,7 +180,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
__u64 bits = lock->l_policy_data.l_inodebits.bits; __u64 bits = lock->l_policy_data.l_inodebits.bits;
/* Inode is set to lock->l_resource->lr_lvb_inode /* Inode is set to lock->l_resource->lr_lvb_inode
* for mdc - bug 24555 */ * for mdc - bug 24555
*/
LASSERT(!lock->l_ast_data); LASSERT(!lock->l_ast_data);
if (!inode) if (!inode)
...@@ -202,7 +203,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, ...@@ -202,7 +203,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
} }
/* For OPEN locks we differentiate between lock modes /* For OPEN locks we differentiate between lock modes
* LCK_CR, LCK_CW, LCK_PR - bug 22891 */ * LCK_CR, LCK_CW, LCK_PR - bug 22891
*/
if (bits & MDS_INODELOCK_OPEN) if (bits & MDS_INODELOCK_OPEN)
ll_have_md_lock(inode, &bits, lock->l_req_mode); ll_have_md_lock(inode, &bits, lock->l_req_mode);
...@@ -285,7 +287,8 @@ __u32 ll_i2suppgid(struct inode *i) ...@@ -285,7 +287,8 @@ __u32 ll_i2suppgid(struct inode *i)
/* Pack the required supplementary groups into the supplied groups array. /* Pack the required supplementary groups into the supplied groups array.
* If we don't need to use the groups from the target inode(s) then we * If we don't need to use the groups from the target inode(s) then we
* instead pack one or more groups from the user's supplementary group * instead pack one or more groups from the user's supplementary group
* array in case it might be useful. Not needed if doing an MDS-side upcall. */ * array in case it might be useful. Not needed if doing an MDS-side upcall.
*/
void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
{ {
LASSERT(i1); LASSERT(i1);
...@@ -388,7 +391,8 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, ...@@ -388,7 +391,8 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
int rc = 0; int rc = 0;
/* NB 1 request reference will be taken away by ll_intent_lock() /* NB 1 request reference will be taken away by ll_intent_lock()
* when I return */ * when I return
*/
CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it, CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
it->d.lustre.it_disposition); it->d.lustre.it_disposition);
if (!it_disposition(it, DISP_LOOKUP_NEG)) { if (!it_disposition(it, DISP_LOOKUP_NEG)) {
...@@ -399,13 +403,14 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, ...@@ -399,13 +403,14 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits); ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
/* We used to query real size from OSTs here, but actually /* We used to query real size from OSTs here, but actually
this is not needed. For stat() calls size would be updated * this is not needed. For stat() calls size would be updated
from subsequent do_revalidate()->ll_inode_revalidate_it() in * from subsequent do_revalidate()->ll_inode_revalidate_it() in
2.4 and * 2.4 and
vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6 * vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6
Everybody else who needs correct file size would call * Everybody else who needs correct file size would call
ll_glimpse_size or some equivalent themselves anyway. * ll_glimpse_size or some equivalent themselves anyway.
Also see bug 7198. */ * Also see bug 7198.
*/
} }
/* Only hash *de if it is unhashed (new dentry). /* Only hash *de if it is unhashed (new dentry).
...@@ -422,8 +427,9 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request, ...@@ -422,8 +427,9 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
*de = alias; *de = alias;
} else if (!it_disposition(it, DISP_LOOKUP_NEG) && } else if (!it_disposition(it, DISP_LOOKUP_NEG) &&
!it_disposition(it, DISP_OPEN_CREATE)) { !it_disposition(it, DISP_OPEN_CREATE)) {
/* With DISP_OPEN_CREATE dentry will /* With DISP_OPEN_CREATE dentry will be
instantiated in ll_create_it. */ * instantiated in ll_create_it.
*/
LASSERT(!d_inode(*de)); LASSERT(!d_inode(*de));
d_instantiate(*de, inode); d_instantiate(*de, inode);
} }
...@@ -672,7 +678,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it) ...@@ -672,7 +678,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
/* We asked for a lock on the directory, but were granted a /* We asked for a lock on the directory, but were granted a
* lock on the inode. Since we finally have an inode pointer, * lock on the inode. Since we finally have an inode pointer,
* stuff it in the lock. */ * stuff it in the lock.
*/
CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n", CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
inode, inode->i_ino, inode->i_generation); inode, inode->i_ino, inode->i_generation);
ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
...@@ -867,7 +874,8 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) ...@@ -867,7 +874,8 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
/* The MDS sent back the EA because we unlinked the last reference /* The MDS sent back the EA because we unlinked the last reference
* to this file. Use this EA to unlink the objects on the OST. * to this file. Use this EA to unlink the objects on the OST.
* It's opaque so we don't swab here; we leave it to obd_unpackmd() to * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
* check it is complete and sensible. */ * check it is complete and sensible.
*/
eadata = req_capsule_server_sized_get(&request->rq_pill, &RMF_MDT_MD, eadata = req_capsule_server_sized_get(&request->rq_pill, &RMF_MDT_MD,
body->eadatasize); body->eadatasize);
LASSERT(eadata); LASSERT(eadata);
...@@ -917,7 +925,8 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) ...@@ -917,7 +925,8 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
/* ll_unlink() doesn't update the inode with the new link count. /* ll_unlink() doesn't update the inode with the new link count.
* Instead, ll_ddelete() and ll_d_iput() will update it based upon if there * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there
* is any lock existing. They will recycle dentries and inodes based upon locks * is any lock existing. They will recycle dentries and inodes based upon locks
* too. b=20433 */ * too. b=20433
*/
static int ll_unlink(struct inode *dir, struct dentry *dentry) static int ll_unlink(struct inode *dir, struct dentry *dentry)
{ {
struct ptlrpc_request *request = NULL; struct ptlrpc_request *request = NULL;
......
...@@ -120,7 +120,8 @@ static struct ll_cl_context *ll_cl_init(struct file *file, ...@@ -120,7 +120,8 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
/* this is too bad. Someone is trying to write the /* this is too bad. Someone is trying to write the
* page w/o holding inode mutex. This means we can * page w/o holding inode mutex. This means we can
* add dirty pages into cache during truncate */ * add dirty pages into cache during truncate
*/
CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n", CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n",
current->comm); current->comm);
dump_stack(); dump_stack();
...@@ -239,7 +240,8 @@ int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from, ...@@ -239,7 +240,8 @@ int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from,
ll_cl_fini(lcc); ll_cl_fini(lcc);
} }
/* returning 0 in prepare assumes commit must be called /* returning 0 in prepare assumes commit must be called
* afterwards */ * afterwards
*/
} else { } else {
result = PTR_ERR(lcc); result = PTR_ERR(lcc);
} }
...@@ -295,8 +297,8 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); ...@@ -295,8 +297,8 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
* to get an ra budget that is larger than the remaining readahead pages * to get an ra budget that is larger than the remaining readahead pages
* and reach here at exactly the same time. They will compute /a ret to * and reach here at exactly the same time. They will compute /a ret to
* consume the remaining pages, but will fail at atomic_add_return() and * consume the remaining pages, but will fail at atomic_add_return() and
* get a zero ra window, although there is still ra space remaining. - Jay */ * get a zero ra window, although there is still ra space remaining. - Jay
*/
static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
struct ra_io_arg *ria, struct ra_io_arg *ria,
unsigned long pages) unsigned long pages)
...@@ -306,7 +308,8 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, ...@@ -306,7 +308,8 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
/* If read-ahead pages left are less than 1M, do not do read-ahead, /* If read-ahead pages left are less than 1M, do not do read-ahead,
* otherwise it will form small read RPC(< 1M), which hurt server * otherwise it will form small read RPC(< 1M), which hurt server
* performance a lot. */ * performance a lot.
*/
ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), pages); ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), pages);
if (ret < 0 || ret < min_t(long, PTLRPC_MAX_BRW_PAGES, pages)) { if (ret < 0 || ret < min_t(long, PTLRPC_MAX_BRW_PAGES, pages)) {
ret = 0; ret = 0;
...@@ -323,7 +326,8 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, ...@@ -323,7 +326,8 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
* branch is more expensive than subtracting zero from the result. * branch is more expensive than subtracting zero from the result.
* *
* Strided read is left unaligned to avoid small fragments beyond * Strided read is left unaligned to avoid small fragments beyond
* the RPC boundary from needing an extra read RPC. */ * the RPC boundary from needing an extra read RPC.
*/
if (ria->ria_pages == 0) { if (ria->ria_pages == 0) {
long beyond_rpc = (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES; long beyond_rpc = (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES;
...@@ -514,13 +518,15 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, ...@@ -514,13 +518,15 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
/* Limit this to the blocksize instead of PTLRPC_BRW_MAX_SIZE, since we don't /* Limit this to the blocksize instead of PTLRPC_BRW_MAX_SIZE, since we don't
* know what the actual RPC size is. If this needs to change, it makes more * know what the actual RPC size is. If this needs to change, it makes more
* sense to tune the i_blkbits value for the file based on the OSTs it is * sense to tune the i_blkbits value for the file based on the OSTs it is
* striped over, rather than having a constant value for all files here. */ * striped over, rather than having a constant value for all files here.
*/
/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_CACHE_SHIFT)). /* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_CACHE_SHIFT)).
* Temporarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled * Temporarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled
* by default, this should be adjusted corresponding with max_read_ahead_mb * by default, this should be adjusted corresponding with max_read_ahead_mb
* and max_read_ahead_per_file_mb otherwise the readahead budget can be used * and max_read_ahead_per_file_mb otherwise the readahead budget can be used
* up quickly which will affect read performance significantly. See LU-2816 */ * up quickly which will affect read performance significantly. See LU-2816
*/
#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_CACHE_SHIFT) #define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_CACHE_SHIFT)
static inline int stride_io_mode(struct ll_readahead_state *ras) static inline int stride_io_mode(struct ll_readahead_state *ras)
...@@ -599,7 +605,8 @@ static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria) ...@@ -599,7 +605,8 @@ static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
/* If ria_length == ria_pages, it means non-stride I/O mode, /* If ria_length == ria_pages, it means non-stride I/O mode,
* idx should always inside read-ahead window in this case * idx should always inside read-ahead window in this case
* For stride I/O mode, just check whether the idx is inside * For stride I/O mode, just check whether the idx is inside
* the ria_pages. */ * the ria_pages.
*/
return ria->ria_length == 0 || ria->ria_length == ria->ria_pages || return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
(idx >= ria->ria_stoff && (idx - ria->ria_stoff) % (idx >= ria->ria_stoff && (idx - ria->ria_stoff) %
ria->ria_length < ria->ria_pages); ria->ria_length < ria->ria_pages);
...@@ -633,11 +640,13 @@ static int ll_read_ahead_pages(const struct lu_env *env, ...@@ -633,11 +640,13 @@ static int ll_read_ahead_pages(const struct lu_env *env,
} else if (stride_ria) { } else if (stride_ria) {
/* If it is not in the read-ahead window, and it is /* If it is not in the read-ahead window, and it is
* read-ahead mode, then check whether it should skip * read-ahead mode, then check whether it should skip
* the stride gap */ * the stride gap
*/
pgoff_t offset; pgoff_t offset;
/* FIXME: This assertion only is valid when it is for /* FIXME: This assertion only is valid when it is for
* forward read-ahead, it will be fixed when backward * forward read-ahead, it will be fixed when backward
* read-ahead is implemented */ * read-ahead is implemented
*/
LASSERTF(page_idx > ria->ria_stoff, "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n", LASSERTF(page_idx > ria->ria_stoff, "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
page_idx, page_idx,
ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_start, ria->ria_end, ria->ria_stoff,
...@@ -720,7 +729,8 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, ...@@ -720,7 +729,8 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
*/ */
/* Note: we only trim the RPC, instead of extending the RPC /* Note: we only trim the RPC, instead of extending the RPC
* to the boundary, so to avoid reading too much pages during * to the boundary, so to avoid reading too much pages during
* random reading. */ * random reading.
*/
rpc_boundary = (end + 1) & (~(PTLRPC_MAX_BRW_PAGES - 1)); rpc_boundary = (end + 1) & (~(PTLRPC_MAX_BRW_PAGES - 1));
if (rpc_boundary > 0) if (rpc_boundary > 0)
rpc_boundary--; rpc_boundary--;
...@@ -773,7 +783,8 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, ...@@ -773,7 +783,8 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
* the ras we need to go back and update the ras so that the * the ras we need to go back and update the ras so that the
* next read-ahead tries from where we left off. we only do so * next read-ahead tries from where we left off. we only do so
* if the region we failed to issue read-ahead on is still ahead * if the region we failed to issue read-ahead on is still ahead
* of the app and behind the next index to start read-ahead from */ * of the app and behind the next index to start read-ahead from
*/
CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu \n", CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu \n",
ra_end, end, ria->ria_end); ra_end, end, ria->ria_end);
...@@ -879,7 +890,8 @@ static void ras_update_stride_detector(struct ll_readahead_state *ras, ...@@ -879,7 +890,8 @@ static void ras_update_stride_detector(struct ll_readahead_state *ras,
} }
/* Stride Read-ahead window will be increased inc_len according to /* Stride Read-ahead window will be increased inc_len according to
* stride I/O pattern */ * stride I/O pattern
*/
static void ras_stride_increase_window(struct ll_readahead_state *ras, static void ras_stride_increase_window(struct ll_readahead_state *ras,
struct ll_ra_info *ra, struct ll_ra_info *ra,
unsigned long inc_len) unsigned long inc_len)
...@@ -950,7 +962,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ...@@ -950,7 +962,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
* or reads to some other part of the file. Secondly if we get a * or reads to some other part of the file. Secondly if we get a
* read-ahead miss that we think we've previously issued. This can * read-ahead miss that we think we've previously issued. This can
* be a symptom of there being so many read-ahead pages that the VM is * be a symptom of there being so many read-ahead pages that the VM is
* reclaiming it before we get to it. */ * reclaiming it before we get to it.
*/
if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) { if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
zero = 1; zero = 1;
ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE); ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE);
...@@ -967,7 +980,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ...@@ -967,7 +980,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
* file up to ra_max_pages_per_file. This is simply a best effort * file up to ra_max_pages_per_file. This is simply a best effort
* and only occurs once per open file. Normal RA behavior is reverted * and only occurs once per open file. Normal RA behavior is reverted
* to for subsequent IO. The mmap case does not increment * to for subsequent IO. The mmap case does not increment
* ras_requests and thus can never trigger this behavior. */ * ras_requests and thus can never trigger this behavior.
*/
if (ras->ras_requests == 2 && !ras->ras_request_index) { if (ras->ras_requests == 2 && !ras->ras_request_index) {
__u64 kms_pages; __u64 kms_pages;
...@@ -1013,14 +1027,16 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ...@@ -1013,14 +1027,16 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
stride_io_mode(ras)) { stride_io_mode(ras)) {
/*If stride-RA hit cache miss, the stride dector /*If stride-RA hit cache miss, the stride dector
*will not be reset to avoid the overhead of *will not be reset to avoid the overhead of
*redetecting read-ahead mode */ *redetecting read-ahead mode
*/
if (index != ras->ras_last_readpage + 1) if (index != ras->ras_last_readpage + 1)
ras->ras_consecutive_pages = 0; ras->ras_consecutive_pages = 0;
ras_reset(inode, ras, index); ras_reset(inode, ras, index);
RAS_CDEBUG(ras); RAS_CDEBUG(ras);
} else { } else {
/* Reset both stride window and normal RA /* Reset both stride window and normal RA
* window */ * window
*/
ras_reset(inode, ras, index); ras_reset(inode, ras, index);
ras->ras_consecutive_pages++; ras->ras_consecutive_pages++;
ras_stride_reset(ras); ras_stride_reset(ras);
...@@ -1029,7 +1045,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ...@@ -1029,7 +1045,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
} else if (stride_io_mode(ras)) { } else if (stride_io_mode(ras)) {
/* If this is contiguous read but in stride I/O mode /* If this is contiguous read but in stride I/O mode
* currently, check whether stride step still is valid, * currently, check whether stride step still is valid,
* if invalid, it will reset the stride ra window*/ * if invalid, it will reset the stride ra window
*/
if (!index_in_stride_window(ras, index)) { if (!index_in_stride_window(ras, index)) {
/* Shrink stride read-ahead window to be zero */ /* Shrink stride read-ahead window to be zero */
ras_stride_reset(ras); ras_stride_reset(ras);
...@@ -1045,7 +1062,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ...@@ -1045,7 +1062,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
if (stride_io_mode(ras)) if (stride_io_mode(ras))
/* Since stride readahead is sensitive to the offset /* Since stride readahead is sensitive to the offset
* of read-ahead, so we use original offset here, * of read-ahead, so we use original offset here,
* instead of ras_window_start, which is RPC aligned */ * instead of ras_window_start, which is RPC aligned
*/
ras->ras_next_readahead = max(index, ras->ras_next_readahead); ras->ras_next_readahead = max(index, ras->ras_next_readahead);
else else
ras->ras_next_readahead = max(ras->ras_window_start, ras->ras_next_readahead = max(ras->ras_window_start,
...@@ -1053,7 +1071,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ...@@ -1053,7 +1071,8 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
RAS_CDEBUG(ras); RAS_CDEBUG(ras);
/* Trigger RA in the mmap case where ras_consecutive_requests /* Trigger RA in the mmap case where ras_consecutive_requests
* is not incremented and thus can't be used to trigger RA */ * is not incremented and thus can't be used to trigger RA
*/
if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) { if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
ras->ras_window_len = RAS_INCREASE_STEP(inode); ras->ras_window_len = RAS_INCREASE_STEP(inode);
goto out_unlock; goto out_unlock;
...@@ -1151,14 +1170,16 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) ...@@ -1151,14 +1170,16 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
/* Flush page failed because the extent is being written out. /* Flush page failed because the extent is being written out.
* Wait for the write of extent to be finished to avoid * Wait for the write of extent to be finished to avoid
* breaking kernel which assumes ->writepage should mark * breaking kernel which assumes ->writepage should mark
* PageWriteback or clean the page. */ * PageWriteback or clean the page.
*/
result = cl_sync_file_range(inode, offset, result = cl_sync_file_range(inode, offset,
offset + PAGE_CACHE_SIZE - 1, offset + PAGE_CACHE_SIZE - 1,
CL_FSYNC_LOCAL, 1); CL_FSYNC_LOCAL, 1);
if (result > 0) { if (result > 0) {
/* actually we may have written more than one page. /* actually we may have written more than one page.
* decreasing this page because the caller will count * decreasing this page because the caller will count
* it. */ * it.
*/
wbc->nr_to_write -= result - 1; wbc->nr_to_write -= result - 1;
result = 0; result = 0;
} }
...@@ -1208,7 +1229,8 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) ...@@ -1208,7 +1229,8 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
if (sbi->ll_umounting) if (sbi->ll_umounting)
/* if the mountpoint is being umounted, all pages have to be /* if the mountpoint is being umounted, all pages have to be
* evicted to avoid hitting LBUG when truncate_inode_pages() * evicted to avoid hitting LBUG when truncate_inode_pages()
* is called later on. */ * is called later on.
*/
ignore_layout = 1; ignore_layout = 1;
result = cl_sync_file_range(inode, start, end, mode, ignore_layout); result = cl_sync_file_range(inode, start, end, mode, ignore_layout);
if (result > 0) { if (result > 0) {
......
...@@ -145,7 +145,8 @@ static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask) ...@@ -145,7 +145,8 @@ static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask)
/* If we can't allocate an env we won't call cl_page_put() /* If we can't allocate an env we won't call cl_page_put()
* later on which further means it's impossible to drop * later on which further means it's impossible to drop
* page refcount by cl_page, so ask kernel to not free * page refcount by cl_page, so ask kernel to not free
* this page. */ * this page.
*/
return 0; return 0;
page = cl_vmpage_page(vmpage, obj); page = cl_vmpage_page(vmpage, obj);
...@@ -212,7 +213,8 @@ static inline int ll_get_user_pages(int rw, unsigned long user_addr, ...@@ -212,7 +213,8 @@ static inline int ll_get_user_pages(int rw, unsigned long user_addr,
} }
/* ll_free_user_pages - tear down page struct array /* ll_free_user_pages - tear down page struct array
* @pages: array of page struct pointers underlying target buffer */ * @pages: array of page struct pointers underlying target buffer
*/
static void ll_free_user_pages(struct page **pages, int npages, int do_dirty) static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
{ {
int i; int i;
...@@ -266,7 +268,8 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, ...@@ -266,7 +268,8 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
do_io = true; do_io = true;
/* check the page type: if the page is a host page, then do /* check the page type: if the page is a host page, then do
* write directly */ * write directly
*/
if (clp->cp_type == CPT_CACHEABLE) { if (clp->cp_type == CPT_CACHEABLE) {
struct page *vmpage = cl_page_vmpage(env, clp); struct page *vmpage = cl_page_vmpage(env, clp);
struct page *src_page; struct page *src_page;
...@@ -284,14 +287,16 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, ...@@ -284,14 +287,16 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
kunmap_atomic(src); kunmap_atomic(src);
/* make sure page will be added to the transfer by /* make sure page will be added to the transfer by
* cl_io_submit()->...->vvp_page_prep_write(). */ * cl_io_submit()->...->vvp_page_prep_write().
*/
if (rw == WRITE) if (rw == WRITE)
set_page_dirty(vmpage); set_page_dirty(vmpage);
if (rw == READ) { if (rw == READ) {
/* do not issue the page for read, since it /* do not issue the page for read, since it
* may reread a ra page which has NOT uptodate * may reread a ra page which has NOT uptodate
* bit set. */ * bit set.
*/
cl_page_disown(env, io, clp); cl_page_disown(env, io, clp);
do_io = false; do_io = false;
} }
...@@ -359,7 +364,8 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, ...@@ -359,7 +364,8 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
* kmalloc limit. We need to fit all of the brw_page structs, each one * kmalloc limit. We need to fit all of the brw_page structs, each one
* representing PAGE_SIZE worth of user data, into a single buffer, and * representing PAGE_SIZE worth of user data, into a single buffer, and
* then truncate this to be a full-sized RPC. For 4kB PAGE_SIZE this is * then truncate this to be a full-sized RPC. For 4kB PAGE_SIZE this is
* up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */ * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc.
*/
#define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \ #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
~(DT_MAX_BRW_SIZE - 1)) ~(DT_MAX_BRW_SIZE - 1))
static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
...@@ -433,7 +439,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, ...@@ -433,7 +439,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
* for the request, shrink it to a smaller * for the request, shrink it to a smaller
* PAGE_SIZE multiple and try again. * PAGE_SIZE multiple and try again.
* We should always be able to kmalloc for a * We should always be able to kmalloc for a
* page worth of page pointers = 4MB on i386. */ * page worth of page pointers = 4MB on i386.
*/
if (result == -ENOMEM && if (result == -ENOMEM &&
size > (PAGE_CACHE_SIZE / sizeof(*pages)) * size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
PAGE_CACHE_SIZE) { PAGE_CACHE_SIZE) {
......
...@@ -494,7 +494,8 @@ static void ll_sai_put(struct ll_statahead_info *sai) ...@@ -494,7 +494,8 @@ static void ll_sai_put(struct ll_statahead_info *sai)
if (unlikely(atomic_read(&sai->sai_refcount) > 0)) { if (unlikely(atomic_read(&sai->sai_refcount) > 0)) {
/* It is race case, the interpret callback just hold /* It is race case, the interpret callback just hold
* a reference count */ * a reference count
*/
spin_unlock(&lli->lli_sa_lock); spin_unlock(&lli->lli_sa_lock);
return; return;
} }
...@@ -631,7 +632,8 @@ static void ll_post_statahead(struct ll_statahead_info *sai) ...@@ -631,7 +632,8 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
LASSERT(fid_is_zero(&minfo->mi_data.op_fid2)); LASSERT(fid_is_zero(&minfo->mi_data.op_fid2));
/* XXX: No fid in reply, this is probably cross-ref case. /* XXX: No fid in reply, this is probably cross-ref case.
* SA can't handle it yet. */ * SA can't handle it yet.
*/
if (body->valid & OBD_MD_MDS) { if (body->valid & OBD_MD_MDS) {
rc = -EAGAIN; rc = -EAGAIN;
goto out; goto out;
...@@ -672,7 +674,8 @@ static void ll_post_statahead(struct ll_statahead_info *sai) ...@@ -672,7 +674,8 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
/* The "ll_sa_entry_to_stated()" will drop related ldlm ibits lock /* The "ll_sa_entry_to_stated()" will drop related ldlm ibits lock
* reference count by calling "ll_intent_drop_lock()" in spite of the * reference count by calling "ll_intent_drop_lock()" in spite of the
* above operations failed or not. Do not worry about calling * above operations failed or not. Do not worry about calling
* "ll_intent_drop_lock()" more than once. */ * "ll_intent_drop_lock()" more than once.
*/
rc = ll_sa_entry_to_stated(sai, entry, rc = ll_sa_entry_to_stated(sai, entry,
rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC); rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC);
if (rc == 0 && entry->se_index == sai->sai_index_wait) if (rc == 0 && entry->se_index == sai->sai_index_wait)
...@@ -698,7 +701,8 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, ...@@ -698,7 +701,8 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
/* release ibits lock ASAP to avoid deadlock when statahead /* release ibits lock ASAP to avoid deadlock when statahead
* thread enqueues lock on parent in readdir and another * thread enqueues lock on parent in readdir and another
* process enqueues lock on child with parent lock held, eg. * process enqueues lock on child with parent lock held, eg.
* unlink. */ * unlink.
*/
handle = it->d.lustre.it_lock_handle; handle = it->d.lustre.it_lock_handle;
ll_intent_drop_lock(it); ll_intent_drop_lock(it);
} }
...@@ -736,7 +740,8 @@ static int ll_statahead_interpret(struct ptlrpc_request *req, ...@@ -736,7 +740,8 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
/* Release the async ibits lock ASAP to avoid deadlock /* Release the async ibits lock ASAP to avoid deadlock
* when statahead thread tries to enqueue lock on parent * when statahead thread tries to enqueue lock on parent
* for readpage and other tries to enqueue lock on child * for readpage and other tries to enqueue lock on child
* with parent's lock held, for example: unlink. */ * with parent's lock held, for example: unlink.
*/
entry->se_handle = handle; entry->se_handle = handle;
wakeup = list_empty(&sai->sai_entries_received); wakeup = list_empty(&sai->sai_entries_received);
list_add_tail(&entry->se_list, list_add_tail(&entry->se_list,
...@@ -947,7 +952,8 @@ static int ll_agl_thread(void *arg) ...@@ -947,7 +952,8 @@ static int ll_agl_thread(void *arg)
if (thread_is_init(thread)) if (thread_is_init(thread))
/* If someone else has changed the thread state /* If someone else has changed the thread state
* (e.g. already changed to SVC_STOPPING), we can't just * (e.g. already changed to SVC_STOPPING), we can't just
* blindly overwrite that setting. */ * blindly overwrite that setting.
*/
thread_set_flags(thread, SVC_RUNNING); thread_set_flags(thread, SVC_RUNNING);
spin_unlock(&plli->lli_agl_lock); spin_unlock(&plli->lli_agl_lock);
wake_up(&thread->t_ctl_waitq); wake_up(&thread->t_ctl_waitq);
...@@ -963,7 +969,8 @@ static int ll_agl_thread(void *arg) ...@@ -963,7 +969,8 @@ static int ll_agl_thread(void *arg)
spin_lock(&plli->lli_agl_lock); spin_lock(&plli->lli_agl_lock);
/* The statahead thread maybe help to process AGL entries, /* The statahead thread maybe help to process AGL entries,
* so check whether list empty again. */ * so check whether list empty again.
*/
if (!list_empty(&sai->sai_entries_agl)) { if (!list_empty(&sai->sai_entries_agl)) {
clli = list_entry(sai->sai_entries_agl.next, clli = list_entry(sai->sai_entries_agl.next,
struct ll_inode_info, lli_agl_list); struct ll_inode_info, lli_agl_list);
...@@ -1048,7 +1055,8 @@ static int ll_statahead_thread(void *arg) ...@@ -1048,7 +1055,8 @@ static int ll_statahead_thread(void *arg)
if (thread_is_init(thread)) if (thread_is_init(thread))
/* If someone else has changed the thread state /* If someone else has changed the thread state
* (e.g. already changed to SVC_STOPPING), we can't just * (e.g. already changed to SVC_STOPPING), we can't just
* blindly overwrite that setting. */ * blindly overwrite that setting.
*/
thread_set_flags(thread, SVC_RUNNING); thread_set_flags(thread, SVC_RUNNING);
spin_unlock(&plli->lli_sa_lock); spin_unlock(&plli->lli_sa_lock);
wake_up(&thread->t_ctl_waitq); wake_up(&thread->t_ctl_waitq);
...@@ -1136,7 +1144,8 @@ static int ll_statahead_thread(void *arg) ...@@ -1136,7 +1144,8 @@ static int ll_statahead_thread(void *arg)
/* If no window for metadata statahead, but there are /* If no window for metadata statahead, but there are
* some AGL entries to be triggered, then try to help * some AGL entries to be triggered, then try to help
* to process the AGL entries. */ * to process the AGL entries.
*/
if (sa_sent_full(sai)) { if (sa_sent_full(sai)) {
spin_lock(&plli->lli_agl_lock); spin_lock(&plli->lli_agl_lock);
while (!list_empty(&sai->sai_entries_agl)) { while (!list_empty(&sai->sai_entries_agl)) {
...@@ -1364,7 +1373,8 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry) ...@@ -1364,7 +1373,8 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
hash = le64_to_cpu(ent->lde_hash); hash = le64_to_cpu(ent->lde_hash);
/* The ll_get_dir_page() can return any page containing /* The ll_get_dir_page() can return any page containing
* the given hash which may be not the start hash. */ * the given hash which may be not the start hash.
*/
if (unlikely(hash < pos)) if (unlikely(hash < pos))
continue; continue;
...@@ -1650,7 +1660,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, ...@@ -1650,7 +1660,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
* but as soon as we expose the sai by attaching it to the lli that * but as soon as we expose the sai by attaching it to the lli that
* default reference can be dropped by another thread calling * default reference can be dropped by another thread calling
* ll_stop_statahead. We need to take a local reference to protect * ll_stop_statahead. We need to take a local reference to protect
* the sai buffer while we intend to access it. */ * the sai buffer while we intend to access it.
*/
ll_sai_get(sai); ll_sai_get(sai);
lli->lli_sai = sai; lli->lli_sai = sai;
...@@ -1666,7 +1677,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, ...@@ -1666,7 +1677,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
thread_set_flags(thread, SVC_STOPPED); thread_set_flags(thread, SVC_STOPPED);
thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED); thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED);
/* Drop both our own local reference and the default /* Drop both our own local reference and the default
* reference from allocation time. */ * reference from allocation time.
*/
ll_sai_put(sai); ll_sai_put(sai);
ll_sai_put(sai); ll_sai_put(sai);
LASSERT(!lli->lli_sai); LASSERT(!lli->lli_sai);
......
...@@ -99,7 +99,8 @@ static int __init init_lustre_lite(void) ...@@ -99,7 +99,8 @@ static int __init init_lustre_lite(void)
/* print an address of _any_ initialized kernel symbol from this /* print an address of _any_ initialized kernel symbol from this
* module, to allow debugging with gdb that doesn't support data * module, to allow debugging with gdb that doesn't support data
* symbols from modules.*/ * symbols from modules.
*/
CDEBUG(D_INFO, "Lustre client module (%p).\n", CDEBUG(D_INFO, "Lustre client module (%p).\n",
&lustre_super_operations); &lustre_super_operations);
...@@ -146,7 +147,8 @@ static int __init init_lustre_lite(void) ...@@ -146,7 +147,8 @@ static int __init init_lustre_lite(void)
cfs_get_random_bytes(seed, sizeof(seed)); cfs_get_random_bytes(seed, sizeof(seed));
/* Nodes with small feet have little entropy. The NID for this /* Nodes with small feet have little entropy. The NID for this
* node gives the most entropy in the low bits */ * node gives the most entropy in the low bits
*/
for (i = 0;; i++) { for (i = 0;; i++) {
if (LNetGetId(i, &lnet_id) == -ENOENT) if (LNetGetId(i, &lnet_id) == -ENOENT)
break; break;
......
...@@ -59,7 +59,8 @@ static int ll_readlink_internal(struct inode *inode, ...@@ -59,7 +59,8 @@ static int ll_readlink_internal(struct inode *inode,
*symname = lli->lli_symlink_name; *symname = lli->lli_symlink_name;
/* If the total CDEBUG() size is larger than a page, it /* If the total CDEBUG() size is larger than a page, it
* will print a warning to the console, avoid this by * will print a warning to the console, avoid this by
* printing just the last part of the symlink. */ * printing just the last part of the symlink.
*/
CDEBUG(D_INODE, "using cached symlink %s%.*s, len = %d\n", CDEBUG(D_INODE, "using cached symlink %s%.*s, len = %d\n",
print_limit < symlen ? "..." : "", print_limit, print_limit < symlen ? "..." : "", print_limit,
(*symname) + symlen - print_limit, symlen); (*symname) + symlen - print_limit, symlen);
......
...@@ -78,7 +78,8 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, ...@@ -78,7 +78,8 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
case CIT_READ: case CIT_READ:
case CIT_WRITE: case CIT_WRITE:
/* don't need lock here to check lli_layout_gen as we have held /* don't need lock here to check lli_layout_gen as we have held
* extent lock and GROUP lock has to hold to swap layout */ * extent lock and GROUP lock has to hold to swap layout
*/
if (ll_layout_version_get(lli) != cio->cui_layout_gen) { if (ll_layout_version_get(lli) != cio->cui_layout_gen) {
io->ci_need_restart = 1; io->ci_need_restart = 1;
/* this will return application a short read/write */ /* this will return application a short read/write */
...@@ -134,7 +135,8 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) ...@@ -134,7 +135,8 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
*/ */
rc = ll_layout_restore(ccc_object_inode(obj)); rc = ll_layout_restore(ccc_object_inode(obj));
/* if restore registration failed, no restart, /* if restore registration failed, no restart,
* we will return -ENODATA */ * we will return -ENODATA
*/
/* The layout will change after restore, so we need to /* The layout will change after restore, so we need to
* block on layout lock hold by the MDT * block on layout lock hold by the MDT
* as MDT will not send new layout in lvb (see LU-3124) * as MDT will not send new layout in lvb (see LU-3124)
...@@ -164,8 +166,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) ...@@ -164,8 +166,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
DFID" layout changed from %d to %d.\n", DFID" layout changed from %d to %d.\n",
PFID(lu_object_fid(&obj->co_lu)), PFID(lu_object_fid(&obj->co_lu)),
cio->cui_layout_gen, gen); cio->cui_layout_gen, gen);
/* today successful restore is the only possible /* today successful restore is the only possible case */
* case */
/* restore was done, clear restoring state */ /* restore was done, clear restoring state */
ll_i2info(ccc_object_inode(obj))->lli_flags &= ll_i2info(ccc_object_inode(obj))->lli_flags &=
~LLIF_FILE_RESTORING; ~LLIF_FILE_RESTORING;
...@@ -456,7 +457,8 @@ static void vvp_io_setattr_end(const struct lu_env *env, ...@@ -456,7 +457,8 @@ static void vvp_io_setattr_end(const struct lu_env *env,
if (cl_io_is_trunc(io)) if (cl_io_is_trunc(io))
/* Truncate in memory pages - they must be clean pages /* Truncate in memory pages - they must be clean pages
* because osc has already notified to destroy osc_extents. */ * because osc has already notified to destroy osc_extents.
*/
vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size); vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
inode_unlock(inode); inode_unlock(inode);
...@@ -529,7 +531,8 @@ static int vvp_io_read_start(const struct lu_env *env, ...@@ -529,7 +531,8 @@ static int vvp_io_read_start(const struct lu_env *env,
vio->u.splice.cui_flags); vio->u.splice.cui_flags);
/* LU-1109: do splice read stripe by stripe otherwise if it /* LU-1109: do splice read stripe by stripe otherwise if it
* may make nfsd stuck if this read occupied all internal pipe * may make nfsd stuck if this read occupied all internal pipe
* buffers. */ * buffers.
*/
io->ci_continue = 0; io->ci_continue = 0;
break; break;
default: default:
...@@ -689,13 +692,15 @@ static int vvp_io_fault_start(const struct lu_env *env, ...@@ -689,13 +692,15 @@ static int vvp_io_fault_start(const struct lu_env *env,
size = i_size_read(inode); size = i_size_read(inode);
/* Though we have already held a cl_lock upon this page, but /* Though we have already held a cl_lock upon this page, but
* it still can be truncated locally. */ * it still can be truncated locally.
*/
if (unlikely((vmpage->mapping != inode->i_mapping) || if (unlikely((vmpage->mapping != inode->i_mapping) ||
(page_offset(vmpage) > size))) { (page_offset(vmpage) > size))) {
CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n"); CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n");
/* return +1 to stop cl_io_loop() and ll_fault() will catch /* return +1 to stop cl_io_loop() and ll_fault() will catch
* and retry. */ * and retry.
*/
result = 1; result = 1;
goto out; goto out;
} }
...@@ -736,7 +741,8 @@ static int vvp_io_fault_start(const struct lu_env *env, ...@@ -736,7 +741,8 @@ static int vvp_io_fault_start(const struct lu_env *env,
} }
/* if page is going to be written, we should add this page into cache /* if page is going to be written, we should add this page into cache
* earlier. */ * earlier.
*/
if (fio->ft_mkwrite) { if (fio->ft_mkwrite) {
wait_on_page_writeback(vmpage); wait_on_page_writeback(vmpage);
if (set_page_dirty(vmpage)) { if (set_page_dirty(vmpage)) {
...@@ -750,7 +756,8 @@ static int vvp_io_fault_start(const struct lu_env *env, ...@@ -750,7 +756,8 @@ static int vvp_io_fault_start(const struct lu_env *env,
/* Do not set Dirty bit here so that in case IO is /* Do not set Dirty bit here so that in case IO is
* started before the page is really made dirty, we * started before the page is really made dirty, we
* still have chance to detect it. */ * still have chance to detect it.
*/
result = cl_page_cache_add(env, io, page, CRT_WRITE); result = cl_page_cache_add(env, io, page, CRT_WRITE);
LASSERT(cl_page_is_owned(page, io)); LASSERT(cl_page_is_owned(page, io));
...@@ -803,7 +810,8 @@ static int vvp_io_fsync_start(const struct lu_env *env, ...@@ -803,7 +810,8 @@ static int vvp_io_fsync_start(const struct lu_env *env,
{ {
/* we should mark TOWRITE bit to each dirty page in radix tree to /* we should mark TOWRITE bit to each dirty page in radix tree to
* verify pages have been written, but this is difficult because of * verify pages have been written, but this is difficult because of
* race. */ * race.
*/
return 0; return 0;
} }
...@@ -1153,7 +1161,8 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, ...@@ -1153,7 +1161,8 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
count = io->u.ci_rw.crw_count; count = io->u.ci_rw.crw_count;
/* "If nbyte is 0, read() will return 0 and have no other /* "If nbyte is 0, read() will return 0 and have no other
* results." -- Single Unix Spec */ * results." -- Single Unix Spec
*/
if (count == 0) if (count == 0)
result = 1; result = 1;
else else
...@@ -1173,20 +1182,23 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, ...@@ -1173,20 +1182,23 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
/* ignore layout change for generic CIT_MISC but not for glimpse. /* ignore layout change for generic CIT_MISC but not for glimpse.
* io context for glimpse must set ci_verify_layout to true, * io context for glimpse must set ci_verify_layout to true,
* see cl_glimpse_size0() for details. */ * see cl_glimpse_size0() for details.
*/
if (io->ci_type == CIT_MISC && !io->ci_verify_layout) if (io->ci_type == CIT_MISC && !io->ci_verify_layout)
io->ci_ignore_layout = 1; io->ci_ignore_layout = 1;
/* Enqueue layout lock and get layout version. We need to do this /* Enqueue layout lock and get layout version. We need to do this
* even for operations requiring to open file, such as read and write, * even for operations requiring to open file, such as read and write,
* because it might not grant layout lock in IT_OPEN. */ * because it might not grant layout lock in IT_OPEN.
*/
if (result == 0 && !io->ci_ignore_layout) { if (result == 0 && !io->ci_ignore_layout) {
result = ll_layout_refresh(inode, &cio->cui_layout_gen); result = ll_layout_refresh(inode, &cio->cui_layout_gen);
if (result == -ENOENT) if (result == -ENOENT)
/* If the inode on MDS has been removed, but the objects /* If the inode on MDS has been removed, but the objects
* on OSTs haven't been destroyed (async unlink), layout * on OSTs haven't been destroyed (async unlink), layout
* fetch will return -ENOENT, we'd ignore this error * fetch will return -ENOENT, we'd ignore this error
* and continue with dirty flush. LU-3230. */ * and continue with dirty flush. LU-3230.
*/
result = 0; result = 0;
if (result < 0) if (result < 0)
CERROR("%s: refresh file layout " DFID " error %d.\n", CERROR("%s: refresh file layout " DFID " error %d.\n",
......
...@@ -137,7 +137,8 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, ...@@ -137,7 +137,8 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
* page may be stale due to layout change, and the process * page may be stale due to layout change, and the process
* will never be notified. * will never be notified.
* This operation is expensive but mmap processes have to pay * This operation is expensive but mmap processes have to pay
* a price themselves. */ * a price themselves.
*/
unmap_mapping_range(conf->coc_inode->i_mapping, unmap_mapping_range(conf->coc_inode->i_mapping,
0, OBD_OBJECT_EOF, 0); 0, OBD_OBJECT_EOF, 0);
......
...@@ -232,7 +232,8 @@ static int vvp_page_prep_write(const struct lu_env *env, ...@@ -232,7 +232,8 @@ static int vvp_page_prep_write(const struct lu_env *env,
LASSERT(!PageDirty(vmpage)); LASSERT(!PageDirty(vmpage));
/* ll_writepage path is not a sync write, so need to set page writeback /* ll_writepage path is not a sync write, so need to set page writeback
* flag */ * flag
*/
if (!pg->cp_sync_io) if (!pg->cp_sync_io)
set_page_writeback(vmpage); set_page_writeback(vmpage);
...@@ -356,15 +357,15 @@ static int vvp_page_make_ready(const struct lu_env *env, ...@@ -356,15 +357,15 @@ static int vvp_page_make_ready(const struct lu_env *env,
lock_page(vmpage); lock_page(vmpage);
if (clear_page_dirty_for_io(vmpage)) { if (clear_page_dirty_for_io(vmpage)) {
LASSERT(pg->cp_state == CPS_CACHED); LASSERT(pg->cp_state == CPS_CACHED);
/* This actually clears the dirty bit in the radix /* This actually clears the dirty bit in the radix tree. */
* tree. */
set_page_writeback(vmpage); set_page_writeback(vmpage);
vvp_write_pending(cl2ccc(slice->cpl_obj), vvp_write_pending(cl2ccc(slice->cpl_obj),
cl2ccc_page(slice)); cl2ccc_page(slice));
CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n"); CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
} else if (pg->cp_state == CPS_PAGEOUT) { } else if (pg->cp_state == CPS_PAGEOUT) {
/* is it possible for osc_flush_async_page() to already /* is it possible for osc_flush_async_page() to already
* make it ready? */ * make it ready?
*/
result = -EALREADY; result = -EALREADY;
} else { } else {
CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n", CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n",
......
...@@ -237,7 +237,8 @@ int ll_setxattr(struct dentry *dentry, const char *name, ...@@ -237,7 +237,8 @@ int ll_setxattr(struct dentry *dentry, const char *name,
/* Attributes that are saved via getxattr will always have /* Attributes that are saved via getxattr will always have
* the stripe_offset as 0. Instead, the MDS should be * the stripe_offset as 0. Instead, the MDS should be
* allowed to pick the starting OST index. b=17846 */ * allowed to pick the starting OST index. b=17846
*/
if (lump && lump->lmm_stripe_offset == 0) if (lump && lump->lmm_stripe_offset == 0)
lump->lmm_stripe_offset = -1; lump->lmm_stripe_offset = -1;
...@@ -480,7 +481,8 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name, ...@@ -480,7 +481,8 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name,
if (size == 0 && S_ISDIR(inode->i_mode)) { if (size == 0 && S_ISDIR(inode->i_mode)) {
/* XXX directory EA is fix for now, optimize to save /* XXX directory EA is fix for now, optimize to save
* RPC transfer */ * RPC transfer
*/
rc = sizeof(struct lov_user_md); rc = sizeof(struct lov_user_md);
goto out; goto out;
} }
...@@ -495,7 +497,8 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name, ...@@ -495,7 +497,8 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name,
} }
} else { } else {
/* LSM is present already after lookup/getattr call. /* LSM is present already after lookup/getattr call.
* we need to grab layout lock once it is implemented */ * we need to grab layout lock once it is implemented
*/
rc = obd_packmd(ll_i2dtexp(inode), &lmm, lsm); rc = obd_packmd(ll_i2dtexp(inode), &lmm, lsm);
lmmsize = rc; lmmsize = rc;
} }
...@@ -508,7 +511,8 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name, ...@@ -508,7 +511,8 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name,
/* used to call ll_get_max_mdsize() forward to get /* used to call ll_get_max_mdsize() forward to get
* the maximum buffer size, while some apps (such as * the maximum buffer size, while some apps (such as
* rsync 3.0.x) care much about the exact xattr value * rsync 3.0.x) care much about the exact xattr value
* size */ * size
*/
rc = lmmsize; rc = lmmsize;
goto out; goto out;
} }
...@@ -524,7 +528,8 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name, ...@@ -524,7 +528,8 @@ ssize_t ll_getxattr(struct dentry *dentry, const char *name,
memcpy(lump, lmm, lmmsize); memcpy(lump, lmm, lmmsize);
/* do not return layout gen for getxattr otherwise it would /* do not return layout gen for getxattr otherwise it would
* confuse tar --xattr by recognizing layout gen as stripe * confuse tar --xattr by recognizing layout gen as stripe
* offset when the file is restored. See LU-2809. */ * offset when the file is restored. See LU-2809.
*/
lump->lmm_layout_gen = 0; lump->lmm_layout_gen = 0;
rc = lmmsize; rc = lmmsize;
......
...@@ -23,7 +23,8 @@ ...@@ -23,7 +23,8 @@
*/ */
struct ll_xattr_entry { struct ll_xattr_entry {
struct list_head xe_list; /* protected with struct list_head xe_list; /* protected with
* lli_xattrs_list_rwsem */ * lli_xattrs_list_rwsem
*/
char *xe_name; /* xattr name, \0-terminated */ char *xe_name; /* xattr name, \0-terminated */
char *xe_value; /* xattr value */ char *xe_value; /* xattr value */
unsigned xe_namelen; /* strlen(xe_name) + 1 */ unsigned xe_namelen; /* strlen(xe_name) + 1 */
...@@ -280,7 +281,8 @@ static int ll_xattr_find_get_lock(struct inode *inode, ...@@ -280,7 +281,8 @@ static int ll_xattr_find_get_lock(struct inode *inode,
mutex_lock(&lli->lli_xattrs_enq_lock); mutex_lock(&lli->lli_xattrs_enq_lock);
/* inode may have been shrunk and recreated, so data is gone, match lock /* inode may have been shrunk and recreated, so data is gone, match lock
* only when data exists. */ * only when data exists.
*/
if (ll_xattr_cache_valid(lli)) { if (ll_xattr_cache_valid(lli)) {
/* Try matching first. */ /* Try matching first. */
mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0, mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment