Commit 869b6ea1 authored by Jan Kara's avatar Jan Kara

quota: Fix slow quotaoff

Eric has reported that commit dabc8b20 ("quota: fix dqput() to
follow the guarantees dquot_srcu should provide") heavily increases
runtime of generic/270 xfstest for ext4 in nojournal mode. The reason
for this is that ext4 in nojournal mode leaves dquots dirty until the last
dqput() and thus the cleanup done in quota_release_workfn() has to write
them all. Due to the way quota_release_workfn() is written this results
in synchronize_srcu() call for each dirty dquot which makes the dquot
cleanup when turning quotas off extremely slow.

To be able to avoid synchronize_srcu() for each dirty dquot we need to
rework how we track dquots to be cleaned up. Instead of keeping the last
dquot reference while it is on releasing_dquots list, we drop it right
away and mark the dquot with new DQ_RELEASING_B bit instead. This way we
can we can remove dquot from releasing_dquots list when new reference to
it is acquired and thus there's no need to call synchronize_srcu() each
time we drop dq_list_lock.

References: https://lore.kernel.org/all/ZRytn6CxFK2oECUt@debian-BULLSEYE-live-builder-AMD64Reported-by: default avatarEric Whitney <enwlinux@gmail.com>
Fixes: dabc8b20 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide")
CC: stable@vger.kernel.org
Signed-off-by: default avatarJan Kara <jack@suse.cz>
parent 8a749fd1
...@@ -233,19 +233,18 @@ static void put_quota_format(struct quota_format_type *fmt) ...@@ -233,19 +233,18 @@ static void put_quota_format(struct quota_format_type *fmt)
* All dquots are placed to the end of inuse_list when first created, and this * All dquots are placed to the end of inuse_list when first created, and this
* list is used for invalidate operation, which must look at every dquot. * list is used for invalidate operation, which must look at every dquot.
* *
* When the last reference of a dquot will be dropped, the dquot will be * When the last reference of a dquot is dropped, the dquot is added to
* added to releasing_dquots. We'd then queue work item which would call * releasing_dquots. We'll then queue work item which will call
* synchronize_srcu() and after that perform the final cleanup of all the * synchronize_srcu() and after that perform the final cleanup of all the
* dquots on the list. Both releasing_dquots and free_dquots use the * dquots on the list. Each cleaned up dquot is moved to free_dquots list.
* dq_free list_head in the dquot struct. When a dquot is removed from * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot
* releasing_dquots, a reference count is always subtracted, and if * struct.
* dq_count == 0 at that point, the dquot will be added to the free_dquots.
* *
* Unused dquots (dq_count == 0) are added to the free_dquots list when freed, * Unused and cleaned up dquots are in the free_dquots list and this list is
* and this list is searched whenever we need an available dquot. Dquots are * searched whenever we need an available dquot. Dquots are removed from the
* removed from the list as soon as they are used again, and * list as soon as they are used again and dqstats.free_dquots gives the number
* dqstats.free_dquots gives the number of dquots on the list. When * of dquots on the list. When dquot is invalidated it's completely released
* dquot is invalidated it's completely released from memory. * from memory.
* *
* Dirty dquots are added to the dqi_dirty_list of quota_info when mark * Dirty dquots are added to the dqi_dirty_list of quota_info when mark
* dirtied, and this list is searched when writing dirty dquots back to * dirtied, and this list is searched when writing dirty dquots back to
...@@ -321,6 +320,7 @@ static inline void put_dquot_last(struct dquot *dquot) ...@@ -321,6 +320,7 @@ static inline void put_dquot_last(struct dquot *dquot)
static inline void put_releasing_dquots(struct dquot *dquot) static inline void put_releasing_dquots(struct dquot *dquot)
{ {
list_add_tail(&dquot->dq_free, &releasing_dquots); list_add_tail(&dquot->dq_free, &releasing_dquots);
set_bit(DQ_RELEASING_B, &dquot->dq_flags);
} }
static inline void remove_free_dquot(struct dquot *dquot) static inline void remove_free_dquot(struct dquot *dquot)
...@@ -328,8 +328,10 @@ static inline void remove_free_dquot(struct dquot *dquot) ...@@ -328,8 +328,10 @@ static inline void remove_free_dquot(struct dquot *dquot)
if (list_empty(&dquot->dq_free)) if (list_empty(&dquot->dq_free))
return; return;
list_del_init(&dquot->dq_free); list_del_init(&dquot->dq_free);
if (!atomic_read(&dquot->dq_count)) if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags))
dqstats_dec(DQST_FREE_DQUOTS); dqstats_dec(DQST_FREE_DQUOTS);
else
clear_bit(DQ_RELEASING_B, &dquot->dq_flags);
} }
static inline void put_inuse(struct dquot *dquot) static inline void put_inuse(struct dquot *dquot)
...@@ -581,12 +583,6 @@ static void invalidate_dquots(struct super_block *sb, int type) ...@@ -581,12 +583,6 @@ static void invalidate_dquots(struct super_block *sb, int type)
continue; continue;
/* Wait for dquot users */ /* Wait for dquot users */
if (atomic_read(&dquot->dq_count)) { if (atomic_read(&dquot->dq_count)) {
/* dquot in releasing_dquots, flush and retry */
if (!list_empty(&dquot->dq_free)) {
spin_unlock(&dq_list_lock);
goto restart;
}
atomic_inc(&dquot->dq_count); atomic_inc(&dquot->dq_count);
spin_unlock(&dq_list_lock); spin_unlock(&dq_list_lock);
/* /*
...@@ -605,6 +601,15 @@ static void invalidate_dquots(struct super_block *sb, int type) ...@@ -605,6 +601,15 @@ static void invalidate_dquots(struct super_block *sb, int type)
* restart. */ * restart. */
goto restart; goto restart;
} }
/*
* The last user already dropped its reference but dquot didn't
* get fully cleaned up yet. Restart the scan which flushes the
* work cleaning up released dquots.
*/
if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) {
spin_unlock(&dq_list_lock);
goto restart;
}
/* /*
* Quota now has no users and it has been written on last * Quota now has no users and it has been written on last
* dqput() * dqput()
...@@ -696,6 +701,13 @@ int dquot_writeback_dquots(struct super_block *sb, int type) ...@@ -696,6 +701,13 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
dq_dirty); dq_dirty);
WARN_ON(!dquot_active(dquot)); WARN_ON(!dquot_active(dquot));
/* If the dquot is releasing we should not touch it */
if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) {
spin_unlock(&dq_list_lock);
flush_delayed_work(&quota_release_work);
spin_lock(&dq_list_lock);
continue;
}
/* Now we have active dquot from which someone is /* Now we have active dquot from which someone is
* holding reference so we can safely just increase * holding reference so we can safely just increase
...@@ -809,18 +821,18 @@ static void quota_release_workfn(struct work_struct *work) ...@@ -809,18 +821,18 @@ static void quota_release_workfn(struct work_struct *work)
/* Exchange the list head to avoid livelock. */ /* Exchange the list head to avoid livelock. */
list_replace_init(&releasing_dquots, &rls_head); list_replace_init(&releasing_dquots, &rls_head);
spin_unlock(&dq_list_lock); spin_unlock(&dq_list_lock);
synchronize_srcu(&dquot_srcu);
restart: restart:
synchronize_srcu(&dquot_srcu);
spin_lock(&dq_list_lock); spin_lock(&dq_list_lock);
while (!list_empty(&rls_head)) { while (!list_empty(&rls_head)) {
dquot = list_first_entry(&rls_head, struct dquot, dq_free); dquot = list_first_entry(&rls_head, struct dquot, dq_free);
/* Dquot got used again? */ WARN_ON_ONCE(atomic_read(&dquot->dq_count));
if (atomic_read(&dquot->dq_count) > 1) { /*
remove_free_dquot(dquot); * Note that DQ_RELEASING_B protects us from racing with
atomic_dec(&dquot->dq_count); * invalidate_dquots() calls so we are safe to work with the
continue; * dquot even after we drop dq_list_lock.
} */
if (dquot_dirty(dquot)) { if (dquot_dirty(dquot)) {
spin_unlock(&dq_list_lock); spin_unlock(&dq_list_lock);
/* Commit dquot before releasing */ /* Commit dquot before releasing */
...@@ -834,7 +846,6 @@ static void quota_release_workfn(struct work_struct *work) ...@@ -834,7 +846,6 @@ static void quota_release_workfn(struct work_struct *work)
} }
/* Dquot is inactive and clean, now move it to free list */ /* Dquot is inactive and clean, now move it to free list */
remove_free_dquot(dquot); remove_free_dquot(dquot);
atomic_dec(&dquot->dq_count);
put_dquot_last(dquot); put_dquot_last(dquot);
} }
spin_unlock(&dq_list_lock); spin_unlock(&dq_list_lock);
...@@ -875,6 +886,7 @@ void dqput(struct dquot *dquot) ...@@ -875,6 +886,7 @@ void dqput(struct dquot *dquot)
BUG_ON(!list_empty(&dquot->dq_free)); BUG_ON(!list_empty(&dquot->dq_free));
#endif #endif
put_releasing_dquots(dquot); put_releasing_dquots(dquot);
atomic_dec(&dquot->dq_count);
spin_unlock(&dq_list_lock); spin_unlock(&dq_list_lock);
queue_delayed_work(system_unbound_wq, &quota_release_work, 1); queue_delayed_work(system_unbound_wq, &quota_release_work, 1);
} }
...@@ -963,7 +975,7 @@ struct dquot *dqget(struct super_block *sb, struct kqid qid) ...@@ -963,7 +975,7 @@ struct dquot *dqget(struct super_block *sb, struct kqid qid)
dqstats_inc(DQST_LOOKUPS); dqstats_inc(DQST_LOOKUPS);
} }
/* Wait for dq_lock - after this we know that either dquot_release() is /* Wait for dq_lock - after this we know that either dquot_release() is
* already finished or it will be canceled due to dq_count > 1 test */ * already finished or it will be canceled due to dq_count > 0 test */
wait_on_dquot(dquot); wait_on_dquot(dquot);
/* Read the dquot / allocate space in quota file */ /* Read the dquot / allocate space in quota file */
if (!dquot_active(dquot)) { if (!dquot_active(dquot)) {
......
...@@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type) ...@@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type)
#define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_FAKE_B 3 /* no limits only usage */
#define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_READ_B 4 /* dquot was read into memory */
#define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */
#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ #define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting
* to be cleaned up */
#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\
* for the mask of entries set via SETQUOTA\ * for the mask of entries set via SETQUOTA\
* quotactl. They are set under dq_data_lock\ * quotactl. They are set under dq_data_lock\
* and the quota format handling dquot can\ * and the quota format handling dquot can\
......
...@@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot) ...@@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot)
{ {
if (test_bit(DQ_MOD_B, &dquot->dq_flags)) if (test_bit(DQ_MOD_B, &dquot->dq_flags))
return true; return true;
if (atomic_read(&dquot->dq_count) > 1) if (atomic_read(&dquot->dq_count) > 0)
return true; return true;
return false; return false;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment