Commit f7f497cb authored by Ritesh Harjani's avatar Ritesh Harjani Committed by Theodore Ts'o

jbd2: kill t_handle_lock transaction spinlock

This patch kills t_handle_lock transaction spinlock completely from
jbd2.

To explain the reasoning, currently there were three sites at which
this spinlock was used.

1. jbd2_journal_wait_updates()
   a. Based on careful code review it can be seen that, we don't need this
      lock here. This is since we wait for any currently ongoing updates
      based on a atomic variable t_updates. And we anyway don't take any
      t_handle_lock while in stop_this_handle().
      i.e.

	write_lock(&journal->j_state_lock()
	jbd2_journal_wait_updates() 			stop_this_handle()
		while (atomic_read(txn->t_updates) { 		|
		DEFINE_WAIT(wait); 				|
		prepare_to_wait(); 				|
		if (atomic_read(txn->t_updates) 		if (atomic_dec_and_test(txn->t_updates))
			write_unlock(&journal->j_state_lock);
			schedule();					wake_up()
			write_lock(&journal->j_state_lock);
		finish_wait();
	   }
	txn->t_state = T_COMMIT
	write_unlock(&journal->j_state_lock);

   b.  Also note that between atomic_inc(&txn->t_updates) in
       start_this_handle() and jbd2_journal_wait_updates(), the
       synchronization happens via read_lock(journal->j_state_lock) in
       start_this_handle();

2. jbd2_journal_extend()
   a. jbd2_journal_extend() is called with the handle of each process from
      task_struct. So no lock required in updating member fields of handle_t

   b. For member fields of h_transaction, all updates happens only via
      atomic APIs (which is also within read_lock()).
      So, no need of this transaction spinlock.

3. update_t_max_wait()
   Based on Jan suggestion, this can be carefully removed using atomic
   cmpxchg API.
   Note that there can be several processes which are waiting for a new
   transaction to be allocated and started. For doing this only one
   process will succeed in taking write_lock() and allocating a new txn.
   After that all of the process will be updating the t_max_wait (max
   transaction wait time). This can be done via below method w/o taking
   any locks using atomic cmpxchg.
   For more details refer [1]

	   new = get_new_val();
	   old = READ_ONCE(ptr->max_val);
	   while (old < new)
		old = cmpxchg(&ptr->max_val, old, new);

[1]: https://lwn.net/Articles/849237/Suggested-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarRitesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/d89e599658b4a1f3893a48c6feded200073037fc.1644992076.git.riteshh@linux.ibm.comSigned-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent cc16eeca
...@@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal, ...@@ -107,7 +107,6 @@ static void jbd2_get_transaction(journal_t *journal,
transaction->t_start_time = ktime_get(); transaction->t_start_time = ktime_get();
transaction->t_tid = journal->j_transaction_sequence++; transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval; transaction->t_expires = jiffies + journal->j_commit_interval;
spin_lock_init(&transaction->t_handle_lock);
atomic_set(&transaction->t_updates, 0); atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits, atomic_set(&transaction->t_outstanding_credits,
jbd2_descriptor_blocks_per_trans(journal) + jbd2_descriptor_blocks_per_trans(journal) +
...@@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal, ...@@ -139,24 +138,21 @@ static void jbd2_get_transaction(journal_t *journal,
/* /*
* Update transaction's maximum wait time, if debugging is enabled. * Update transaction's maximum wait time, if debugging is enabled.
* *
* In order for t_max_wait to be reliable, it must be protected by a * t_max_wait is carefully updated here with use of atomic compare exchange.
* lock. But doing so will mean that start_this_handle() can not be * Note that there could be multiplre threads trying to do this simultaneously
* run in parallel on SMP systems, which limits our scalability. So * hence using cmpxchg to avoid any use of locks in this case.
* unless debugging is enabled, we no longer update t_max_wait, which
* means that maximum wait time reported by the jbd2_run_stats
* tracepoint will always be zero.
*/ */
static inline void update_t_max_wait(transaction_t *transaction, static inline void update_t_max_wait(transaction_t *transaction,
unsigned long ts) unsigned long ts)
{ {
#ifdef CONFIG_JBD2_DEBUG #ifdef CONFIG_JBD2_DEBUG
unsigned long oldts, newts;
if (jbd2_journal_enable_debug && if (jbd2_journal_enable_debug &&
time_after(transaction->t_start, ts)) { time_after(transaction->t_start, ts)) {
ts = jbd2_time_diff(ts, transaction->t_start); newts = jbd2_time_diff(ts, transaction->t_start);
spin_lock(&transaction->t_handle_lock); oldts = READ_ONCE(transaction->t_max_wait);
if (ts > transaction->t_max_wait) while (oldts < newts)
transaction->t_max_wait = ts; oldts = cmpxchg(&transaction->t_max_wait, oldts, newts);
spin_unlock(&transaction->t_handle_lock);
} }
#endif #endif
} }
...@@ -690,7 +686,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) ...@@ -690,7 +686,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
DIV_ROUND_UP( DIV_ROUND_UP(
handle->h_revoke_credits_requested, handle->h_revoke_credits_requested,
journal->j_revoke_records_per_block); journal->j_revoke_records_per_block);
spin_lock(&transaction->t_handle_lock);
wanted = atomic_add_return(nblocks, wanted = atomic_add_return(nblocks,
&transaction->t_outstanding_credits); &transaction->t_outstanding_credits);
...@@ -698,7 +693,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) ...@@ -698,7 +693,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
jbd_debug(3, "denied handle %p %d blocks: " jbd_debug(3, "denied handle %p %d blocks: "
"transaction too large\n", handle, nblocks); "transaction too large\n", handle, nblocks);
atomic_sub(nblocks, &transaction->t_outstanding_credits); atomic_sub(nblocks, &transaction->t_outstanding_credits);
goto unlock; goto error_out;
} }
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
...@@ -714,8 +709,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) ...@@ -714,8 +709,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
result = 0; result = 0;
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
unlock:
spin_unlock(&transaction->t_handle_lock);
error_out: error_out:
read_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
return result; return result;
...@@ -860,15 +853,12 @@ void jbd2_journal_wait_updates(journal_t *journal) ...@@ -860,15 +853,12 @@ void jbd2_journal_wait_updates(journal_t *journal)
if (!transaction) if (!transaction)
break; break;
spin_lock(&transaction->t_handle_lock);
prepare_to_wait(&journal->j_wait_updates, &wait, prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
if (!atomic_read(&transaction->t_updates)) { if (!atomic_read(&transaction->t_updates)) {
spin_unlock(&transaction->t_handle_lock);
finish_wait(&journal->j_wait_updates, &wait); finish_wait(&journal->j_wait_updates, &wait);
break; break;
} }
spin_unlock(&transaction->t_handle_lock);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
schedule(); schedule();
finish_wait(&journal->j_wait_updates, &wait); finish_wait(&journal->j_wait_updates, &wait);
......
...@@ -554,9 +554,6 @@ struct transaction_chp_stats_s { ...@@ -554,9 +554,6 @@ struct transaction_chp_stats_s {
* ->j_list_lock * ->j_list_lock
* *
* j_state_lock * j_state_lock
* ->t_handle_lock
*
* j_state_lock
* ->j_list_lock (journal_unmap_buffer) * ->j_list_lock (journal_unmap_buffer)
* *
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment