From 2cb512d0a32fe43ca588d94f0935a1d4a168cf0f Mon Sep 17 00:00:00 2001
From: Yoni Fogel <yoni@tokutek.com>
Date: Mon, 2 Jul 2012 19:04:30 +0000
Subject: [PATCH] refs #5149 Fix up ydb layer read-only txn optimizations. The
 is_read_only function already took into account children, so we don't need
 complicated logic.

git-svn-id: file:///svn/toku/tokudb@45205 c7de825b-a66e-492c-adef-691d508d4ae1
---
 ft/txn.c      |  2 ++
 ft/txn.h      |  2 ++
 src/ydb_txn.c | 45 ++++++++++++++++-----------------------------
 3 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/ft/txn.c b/ft/txn.c
index 3feeb9d926..b05e21a389 100644
--- a/ft/txn.c
+++ b/ft/txn.c
@@ -516,6 +516,8 @@ toku_maybe_log_begin_txn_for_write_operation(TOKUTXN txn) {
 
 bool
 toku_txn_is_read_only(TOKUTXN txn) {
+    // No need to recursively check children because parents are
+    // recursively logged before children.
     if (!txn->begin_was_logged) {
         // Did no work.
         invariant(txn->roll_info.num_rollentries == 0);
diff --git a/ft/txn.h b/ft/txn.h
index 10806374a0..19bdb4b4c5 100644
--- a/ft/txn.h
+++ b/ft/txn.h
@@ -114,6 +114,8 @@ struct tokulogger_preplist {
 int toku_logger_recover_txn (TOKULOGGER logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, u_int32_t flags);
 
 void toku_maybe_log_begin_txn_for_write_operation(TOKUTXN txn);
+
+// Return whether txn (or it's descendents) have done no work.
 bool toku_txn_is_read_only(TOKUTXN txn);
 
 #if defined(__cplusplus) || defined(__cilkplusplus)
diff --git a/src/ydb_txn.c b/src/ydb_txn.c
index b346085e64..7dccef6efa 100644
--- a/src/ydb_txn.c
+++ b/src/ydb_txn.c
@@ -13,11 +13,6 @@
 #include <valgrind/helgrind.h>
 #include "ft/txn_manager.h"
 
-static int toku_txn_commit(DB_TXN * txn, u_int32_t flags, TXN_PROGRESS_POLL_FUNCTION poll,
-        void *poll_extra, bool release_mo_client_lock_if_held, bool *holds_mo_lock);
-static int toku_txn_abort(DB_TXN * txn, TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra,
-        bool *holds_mo_lock);
-
 static int 
 toku_txn_release_locks(DB_TXN* txn) {
     assert(txn);
@@ -60,20 +55,12 @@ toku_txn_destroy(DB_TXN *txn) {
 static int
 toku_txn_commit(DB_TXN * txn, u_int32_t flags,
                 TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra,
-                bool release_mo_lock_if_held,
-                bool *holds_mo_lock) {
+                bool release_mo_lock) {
     HANDLE_PANICKED_ENV(txn->mgrp);
-    // Don't take multi-operation lock until we see a non-readonly txn.
-    if (!*holds_mo_lock &&
-        !toku_txn_is_read_only(db_txn_struct_i(txn)->tokutxn)) {
-        toku_multi_operation_client_lock();
-        *holds_mo_lock = true;
-    }
     //Recursively kill off children
     if (db_txn_struct_i(txn)->child) {
         //commit of child sets the child pointer to NULL
-        int r_child = toku_txn_commit(db_txn_struct_i(txn)->child,
-                flags, NULL, NULL, false, holds_mo_lock);
+        int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, flags, NULL, NULL, false);
         if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) {
             env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n");
         }
@@ -125,7 +112,7 @@ toku_txn_commit(DB_TXN * txn, u_int32_t flags,
     // this lock must be held until the references to the open FTs is released
     // begin checkpoint logs these associations, so we must be protect
     // the changing of these associations with checkpointing
-    if (release_mo_lock_if_held && *holds_mo_lock) {
+    if (release_mo_lock) {
         toku_multi_operation_client_unlock();
     }
     toku_txn_maybe_fsync_log(logger, do_fsync_lsn, do_fsync);
@@ -154,20 +141,12 @@ toku_txn_id64(DB_TXN * txn) {
 
 static int 
 toku_txn_abort(DB_TXN * txn,
-               TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra,
-               bool *holds_mo_lock) {
+               TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra) {
     HANDLE_PANICKED_ENV(txn->mgrp);
-    // Don't take multi-operation lock until we see a non-readonly txn.
-    if (!*holds_mo_lock &&
-        !toku_txn_is_read_only(db_txn_struct_i(txn)->tokutxn)) {
-        toku_multi_operation_client_lock();
-        *holds_mo_lock = true;
-    }
     //Recursively kill off children (abort or commit are both correct, commit is cheaper)
     if (db_txn_struct_i(txn)->child) {
         //commit of child sets the child pointer to NULL
-        int r_child = toku_txn_commit(db_txn_struct_i(txn)->child,
-                DB_TXN_NOSYNC, NULL, NULL, false, holds_mo_lock);
+        int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, DB_TXN_NOSYNC, NULL, NULL, false);
         if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) {
             env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent abort.\n");
         }
@@ -216,7 +195,7 @@ toku_txn_xa_prepare (DB_TXN *txn, TOKU_XA_XID *xid) {
         //commit of child sets the child pointer to NULL
 
         // toku_txn_commit will take the mo_lock if not held and a non-readonly txn is found.
-        int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, 0, NULL, NULL, false, &holds_mo_lock);
+        int r_child = toku_txn_commit(db_txn_struct_i(txn)->child, 0, NULL, NULL, false);
         if (r_child !=0 && !toku_env_is_panicked(txn->mgrp)) {
             env_panic(txn->mgrp, r_child, "Recursive child commit failed during parent commit.\n");
         }
@@ -278,12 +257,16 @@ locked_txn_commit_with_progress(DB_TXN *txn, u_int32_t flags,
         toku_checkpoint(txn->mgrp->i->cachetable, txn->mgrp->i->logger, NULL, NULL, NULL, NULL, TXN_COMMIT_CHECKPOINT);
     }
     bool holds_mo_lock = false;
+    if (!toku_txn_is_read_only(db_txn_struct_i(txn)->tokutxn)) {
+        toku_multi_operation_client_lock();
+        holds_mo_lock = true;
+    }
     // cannot begin a checkpoint.
     // the multi operation lock is taken the first time we
     // see a non-readonly txn in the recursive commit.
     // But released in the first-level toku_txn_commit (if taken),
     // this way, we don't hold it while we fsync the log.
-    int r = toku_txn_commit(txn, flags, poll, poll_extra, true, &holds_mo_lock);
+    int r = toku_txn_commit(txn, flags, poll, poll_extra, holds_mo_lock);
     return r;
 }
 
@@ -295,7 +278,11 @@ locked_txn_abort_with_progress(DB_TXN *txn,
     // see a non-readonly txn in the abort (or recursive commit).
     // But released here so we don't have to hold additional state.
     bool holds_mo_lock = false;
-    int r = toku_txn_abort(txn, poll, poll_extra, &holds_mo_lock);
+    if (!toku_txn_is_read_only(db_txn_struct_i(txn)->tokutxn)) {
+        toku_multi_operation_client_lock();
+        holds_mo_lock = true;
+    }
+    int r = toku_txn_abort(txn, poll, poll_extra);
     if (holds_mo_lock) {
         toku_multi_operation_client_unlock();
     }
-- 
2.30.9