checkpoint.cc 9.93 KB
Newer Older
1 2
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3
#ident "Copyright (c) 2009-2010 Tokutek Inc.  All rights reserved."
4 5 6 7 8 9 10
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#ident "$Id$"

/***********
 * The purpose of this file is to implement the high-level logic for 
 * taking a checkpoint.
 *
11
 * There are three locks used for taking a checkpoint.  They are listed below.
12 13 14 15 16 17 18 19 20 21 22
 *
 * NOTE: The reader-writer locks may be held by either multiple clients 
 *       or the checkpoint function.  (The checkpoint function has the role
 *       of the writer, the clients have the reader roles.)
 *
 *  - multi_operation_lock
 *    This is a new reader-writer lock.
 *    This lock is held by the checkpoint function only for as long as is required to 
 *    to set all the "pending" bits and to create the checkpoint-in-progress versions
 *    of the header and translation table (btt).
 *    The following operations must take the multi_operation_lock:
23
 *     - any set of operations that must be atomic with respect to begin checkpoint
24 25 26 27 28 29 30 31 32 33
 *
 *  - checkpoint_safe_lock
 *    This is a new reader-writer lock.
 *    This lock is held for the entire duration of the checkpoint.
 *    It is used to prevent more than one checkpoint from happening at a time
 *    (the checkpoint function is non-re-entrant), and to prevent certain operations
 *    that should not happen during a checkpoint.  
 *    The following operations must take the checkpoint_safe lock:
 *       - delete a dictionary
 *       - rename a dictionary
34 35
 *    The application can use this lock to disable checkpointing during other sensitive
 *    operations, such as making a backup copy of the database.
36
 *
37
 * Once the "pending" bits are set and the snapshots are taken of the header and btt,
38 39 40 41 42 43
 * most normal database operations are permitted to resume.
 *
 *
 *
 *****/

44
#include <toku_portability.h>
45 46
#include <time.h>

47
#include "fttypes.h"
48
#include "cachetable.h"
49 50
#include "log-internal.h"
#include "logger.h"
51
#include "checkpoint.h"
52
#include <portability/toku_atomic.h>
53

54 55 56 57 58 59
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.

60
static CHECKPOINT_STATUS_S cp_status;
61 62

#define STATUS_INIT(k,t,l) { \
63 64 65
        cp_status.status[k].keyname = #k; \
        cp_status.status[k].type    = t;  \
        cp_status.status[k].legend  = "checkpoint: " l; \
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
    }

static void
status_init(void) {
    // Note, this function initializes the keyname, type, and legend fields.
    // Value fields are initialized to zero by compiler.

    STATUS_INIT(CP_PERIOD,                              UINT64,   "period");
    STATUS_INIT(CP_FOOTPRINT,                           UINT64,   "footprint");
    STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN,          UNIXTIME, "last checkpoint began ");
    STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE, UNIXTIME, "last complete checkpoint began ");
    STATUS_INIT(CP_TIME_LAST_CHECKPOINT_END,            UNIXTIME, "last complete checkpoint ended");
    STATUS_INIT(CP_LAST_LSN,                            UINT64,   "last complete checkpoint LSN");
    STATUS_INIT(CP_CHECKPOINT_COUNT,                    UINT64,   "checkpoints taken ");
    STATUS_INIT(CP_CHECKPOINT_COUNT_FAIL,               UINT64,   "checkpoints failed");
    STATUS_INIT(CP_WAITERS_NOW,                         UINT64,   "waiters now");
    STATUS_INIT(CP_WAITERS_MAX,                         UINT64,   "waiters max");
    STATUS_INIT(CP_CLIENT_WAIT_ON_MO,                   UINT64,   "non-checkpoint client wait on mo lock");
    STATUS_INIT(CP_CLIENT_WAIT_ON_CS,                   UINT64,   "non-checkpoint client wait on cs lock");
    cp_status.initialized = true;
}
#undef STATUS_INIT

#define STATUS_VALUE(x) cp_status.status[x].value.num

void
toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
    if (!cp_status.initialized)
94
        status_init();
95 96 97 98 99 100
    STATUS_VALUE(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
    *statp = cp_status;
}



101
static LSN last_completed_checkpoint_lsn;
102

103 104 105
static toku_pthread_rwlock_t checkpoint_safe_lock;
static toku_pthread_rwlock_t multi_operation_lock;

Yoni Fogel's avatar
Yoni Fogel committed
106 107 108
static bool initialized = false;     // sanity check
static volatile bool locked_mo = false;       // true when the multi_operation write lock is held (by checkpoint)
static volatile bool locked_cs = false;       // true when the checkpoint_safe write lock is held (by checkpoint)
109 110


111 112 113 114
// Note following static functions are called from checkpoint internal logic only,
// and use the "writer" calls for locking and unlocking.


115
static void
116
multi_operation_lock_init(void) {
117 118
    pthread_rwlockattr_t attr;
    pthread_rwlockattr_init(&attr);
119
#if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP)
120
    pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
121 122 123 124
#else
    // TODO: need to figure out how to make writer-preferential rwlocks
    // happen on osx
#endif
125
    toku_pthread_rwlock_init(&multi_operation_lock, &attr); 
126
    pthread_rwlockattr_destroy(&attr);
Yoni Fogel's avatar
Yoni Fogel committed
127
    locked_mo = false;
128 129
}

130
static void
131
multi_operation_lock_destroy(void) {
132
    toku_pthread_rwlock_destroy(&multi_operation_lock);
133 134 135 136
}

static void 
multi_operation_checkpoint_lock(void) {
137
    toku_pthread_rwlock_wrlock(&multi_operation_lock);   
Yoni Fogel's avatar
Yoni Fogel committed
138
    locked_mo = true;
139 140 141 142
}

static void 
multi_operation_checkpoint_unlock(void) {
Yoni Fogel's avatar
Yoni Fogel committed
143
    locked_mo = false;
144
    toku_pthread_rwlock_wrunlock(&multi_operation_lock); 
145 146
}

147
static void
148
checkpoint_safe_lock_init(void) {
149
    toku_pthread_rwlock_init(&checkpoint_safe_lock, NULL); 
Yoni Fogel's avatar
Yoni Fogel committed
150
    locked_cs = false;
151 152
}

153
static void
154
checkpoint_safe_lock_destroy(void) {
155
    toku_pthread_rwlock_destroy(&checkpoint_safe_lock); 
156 157 158 159
}

static void 
checkpoint_safe_checkpoint_lock(void) {
160
    toku_pthread_rwlock_wrlock(&checkpoint_safe_lock);   
Yoni Fogel's avatar
Yoni Fogel committed
161
    locked_cs = true;
162 163 164 165
}

static void 
checkpoint_safe_checkpoint_unlock(void) {
Yoni Fogel's avatar
Yoni Fogel committed
166
    locked_cs = false;
167
    toku_pthread_rwlock_wrunlock(&checkpoint_safe_lock); 
168 169 170 171 172 173 174 175
}


// toku_xxx_client_(un)lock() functions are only called from client code,
// never from checkpoint code, and use the "reader" interface to the lock functions.

void 
toku_multi_operation_client_lock(void) {
176
    if (locked_mo)
177
        (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_MO), 1);
178
    toku_pthread_rwlock_rdlock(&multi_operation_lock);   
179 180 181 182
}

void 
toku_multi_operation_client_unlock(void) {
183
    toku_pthread_rwlock_rdunlock(&multi_operation_lock); 
184 185 186 187
}

void 
toku_checkpoint_safe_client_lock(void) {
188
    if (locked_cs)
189
        (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1);
190
    toku_pthread_rwlock_rdlock(&checkpoint_safe_lock);  
Zardosht Kasheff's avatar
Zardosht Kasheff committed
191
    toku_multi_operation_client_lock();
192 193 194 195
}

void 
toku_checkpoint_safe_client_unlock(void) {
196
    toku_pthread_rwlock_rdunlock(&checkpoint_safe_lock); 
197
    toku_multi_operation_client_unlock();
198 199 200
}


201

202
// Initialize the checkpoint mechanism, must be called before any client operations.
203
void
204
toku_checkpoint_init(void) {
205 206
    multi_operation_lock_init();
    checkpoint_safe_lock_init();
Yoni Fogel's avatar
Yoni Fogel committed
207
    initialized = true;
208 209
}

210
void
Yoni Fogel's avatar
Yoni Fogel committed
211
toku_checkpoint_destroy(void) {
212 213
    multi_operation_lock_destroy();
    checkpoint_safe_lock_destroy();
Yoni Fogel's avatar
Yoni Fogel committed
214
    initialized = false;
215 216
}

217
#define SET_CHECKPOINT_FOOTPRINT(x) STATUS_VALUE(CP_FOOTPRINT) = footprint_offset + x
218

219 220 221

// Take a checkpoint of all currently open dictionaries
int 
Zardosht Kasheff's avatar
Zardosht Kasheff committed
222
toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
223 224 225
                void (*callback_f)(void*),  void * extra,
                void (*callback2_f)(void*), void * extra2,
                checkpoint_caller_t caller_id) {
226
    int r = 0;
227
    int footprint_offset = (int) caller_id * 1000;
228 229

    assert(initialized);
230

231
    (void) toku_sync_fetch_and_add(&STATUS_VALUE(CP_WAITERS_NOW), 1);
232
    checkpoint_safe_checkpoint_lock();
233
    (void) toku_sync_fetch_and_sub(&STATUS_VALUE(CP_WAITERS_NOW), 1);
234

235
    if (STATUS_VALUE(CP_WAITERS_NOW) > STATUS_VALUE(CP_WAITERS_MAX))
236
        STATUS_VALUE(CP_WAITERS_MAX) = STATUS_VALUE(CP_WAITERS_NOW);  // threadsafe, within checkpoint_safe lock
237 238

    SET_CHECKPOINT_FOOTPRINT(10);
Zardosht Kasheff's avatar
Zardosht Kasheff committed
239
    multi_operation_checkpoint_lock();
240
    SET_CHECKPOINT_FOOTPRINT(20);
241
    toku_ft_open_close_lock();
242
    
243
    SET_CHECKPOINT_FOOTPRINT(30);
244
    STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
245
    toku_cachetable_begin_checkpoint(cp, logger);
246

247
    toku_ft_open_close_unlock();
248
    multi_operation_checkpoint_unlock();
249

250
    SET_CHECKPOINT_FOOTPRINT(40);
251
    if (r==0) {
Zardosht Kasheff's avatar
Zardosht Kasheff committed
252
        if (callback_f) 
Zardosht Kasheff's avatar
Zardosht Kasheff committed
253
            callback_f(extra);      // callback is called with checkpoint_safe_lock still held
254
        toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2);
255
    }
256
    SET_CHECKPOINT_FOOTPRINT(50);
257
    if (r==0 && logger) {
258 259
        last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
        r = toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
260
        STATUS_VALUE(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
261
    }
262

263
    SET_CHECKPOINT_FOOTPRINT(60);
264 265
    STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
    STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN);
266

267
    if (r == 0)
268
        STATUS_VALUE(CP_CHECKPOINT_COUNT)++;
269
    else
270
        STATUS_VALUE(CP_CHECKPOINT_COUNT_FAIL)++;
271

272
    STATUS_VALUE(CP_FOOTPRINT) = 0;
273
    checkpoint_safe_checkpoint_unlock();
274 275
    return r;
}
276

277
#include <toku_race_tools.h>
278
void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void);
279
void
280
toku_checkpoint_helgrind_ignore(void) {
281 282 283
    TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status);
    TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo);
    TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs);
284
}
285 286 287

#undef SET_CHECKPOINT_FOOTPRINT
#undef STATUS_VALUE