mf_keycache.c 82.9 KB
Newer Older
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1 2 3 4 5 6 7 8
/* Copyright (C) 2000 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
10 11 12 13 14 15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
16 17

/*
monty@mysql.com's avatar
monty@mysql.com committed
18 19
  These functions handle keyblock cacheing for ISAM and MyISAM tables.

20 21
  One cache can handle many files.
  It must contain buffers of the same blocksize.
bk@work.mysql.com's avatar
bk@work.mysql.com committed
22
  init_key_cache() should be used to init cache handler.
23 24 25 26 27 28 29 30 31

  The free list (free_block_list) is a stack like structure.
  When a block is freed by free_block(), it is pushed onto the stack.
  When a new block is required it is first tried to pop one from the stack.
  If the stack is empty, it is tried to get a never-used block from the pool.
  If this is empty too, then a block is taken from the LRU ring, flushing it
  to disk, if neccessary. This is handled in find_key_block().
  With the new free list, the blocks can have three temperatures:
  hot, warm and cold (which is free). This is remembered in the block header
32 33 34 35
  by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
  temperature is neccessary to correctly count the number of warm blocks,
  which is required to decide when blocks are allowed to become hot. Whenever
  a block is inserted to another (sub-)chain, we take the old and new
36 37 38 39
  temperature into account to decide if we got one more or less warm block.
  blocks_unused is the sum of never used blocks in the pool and of currently
  free blocks. blocks_used is the number of blocks fetched from the pool and
  as such gives the maximum number of in-use blocks at any time.
40
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
41 42

#include "mysys_priv.h"
43
#include <keycache.h>
bk@work.mysql.com's avatar
bk@work.mysql.com committed
44 45 46
#include "my_static.h"
#include <m_string.h>
#include <errno.h>
47 48
#include <stdarg.h>

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
/*
  Some compilation flags have been added specifically for this module
  to control the following:
  - not to let a thread to yield the control when reading directly
    from key cache, which might improve performance in many cases;
    to enable this add:
    #define SERIALIZED_READ_FROM_CACHE
  - to set an upper bound for number of threads simultaneously
    using the key cache; this setting helps to determine an optimal
    size for hash table and improve performance when the number of
    blocks in the key cache much less than the number of threads
    accessing it;
    to set this number equal to <N> add
      #define MAX_THREADS <N>
  - to substitute calls of pthread_cond_wait for calls of
    pthread_cond_timedwait (wait with timeout set up);
    this setting should be used only when you want to trap a deadlock
    situation, which theoretically should not happen;
    to set timeout equal to <T> seconds add
      #define KEYCACHE_TIMEOUT <T>
  - to enable the module traps and to send debug information from
    key cache module to a special debug log add:
      #define KEYCACHE_DEBUG
    the name of this debug log file <LOG NAME> can be set through:
      #define KEYCACHE_DEBUG_LOG  <LOG NAME>
    if the name is not defined, it's set by default;
    if the KEYCACHE_DEBUG flag is not set up and we are in a debug
    mode, i.e. when ! defined(DBUG_OFF), the debug information from the
    module is sent to the regular debug log.

  Example of the settings:
    #define SERIALIZED_READ_FROM_CACHE
    #define MAX_THREADS   100
    #define KEYCACHE_TIMEOUT  1
    #define KEYCACHE_DEBUG
    #define KEYCACHE_DEBUG_LOG  "my_key_cache_debug.log"
85
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
86 87

#if defined(MSDOS) && !defined(M_IC80386)
88
/* we nead much memory */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
89 90
#undef my_malloc_lock
#undef my_free_lock
91 92 93 94 95 96 97 98 99 100 101 102
#define my_malloc_lock(A,B)  halloc((long) (A/IO_SIZE),IO_SIZE)
#define my_free_lock(A,B)    hfree(A)
#endif /* defined(MSDOS) && !defined(M_IC80386) */

#define STRUCT_PTR(TYPE, MEMBER, a)                                           \
          (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))

/* types of condition variables */
#define  COND_FOR_REQUESTED 0
#define  COND_FOR_SAVED     1
#define  COND_FOR_READERS   2

103 104 105
typedef pthread_cond_t KEYCACHE_CONDVAR;

/* descriptor of the page in the key cache block buffer */
106
struct st_keycache_page
107
{
108 109
  int file;               /* file to which the page belongs to  */
  my_off_t filepos;       /* position of the page in the file   */
110
};
111

112
/* element in the chain of a hash table bucket */
113
struct st_hash_link
114
{
115 116 117 118 119
  struct st_hash_link *next, **prev; /* to connect links in the same bucket  */
  struct st_block_link *block;       /* reference to the block for the page: */
  File file;                         /* from such a file                     */
  my_off_t diskpos;                  /* with such an offset                  */
  uint requests;                     /* number of requests for the page      */
120
};
121 122 123 124 125 126 127 128 129 130 131 132 133 134

/* simple states of a block */
#define BLOCK_ERROR       1   /* an error occured when performing disk i/o   */
#define BLOCK_READ        2   /* the is page in the block buffer             */
#define BLOCK_IN_SWITCH   4   /* block is preparing to read new page         */
#define BLOCK_REASSIGNED  8   /* block does not accept requests for old page */
#define BLOCK_IN_FLUSH   16   /* block is in flush operation                 */
#define BLOCK_CHANGED    32   /* block buffer contains a dirty page          */

/* page status, returned by find_key_block */
#define PAGE_READ               0
#define PAGE_TO_BE_READ         1
#define PAGE_WAIT_TO_BE_READ    2

135 136 137
/* block temperature determines in which (sub-)chain the block currently is */
enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };

138
/* key cache block */
139
struct st_block_link
140
{
141 142 143 144 145 146 147 148 149 150 151
  struct st_block_link
    *next_used, **prev_used;   /* to connect links in the LRU chain (ring)   */
  struct st_block_link
    *next_changed, **prev_changed; /* for lists of file dirty/clean blocks   */
  struct st_hash_link *hash_link; /* backward ptr to referring hash_link     */
  KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
  uint requests;          /* number of requests for the block                */
  byte *buffer;           /* buffer for the block page                       */
  uint offset;            /* beginning of modified data in the buffer        */
  uint length;            /* end of data in the buffer                       */
  uint status;            /* state of the block                              */
152
  enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
153 154
  uint hits_left;         /* number of hits left until promotion             */
  ulonglong last_hit_time; /* timestamp of the last hit                      */
155
  KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event    */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
156
};
157

158 159 160
KEY_CACHE dflt_key_cache_var;
KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;

161 162
#define FLUSH_CACHE         2000            /* sort this many blocks at once */

163
static int flush_all_key_blocks(KEY_CACHE *keycache);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
164
static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
165
                                   struct st_my_thread_var *thread);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
166
static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
167 168
                                     struct st_my_thread_var *thread);
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block);
169
static void test_key_cache(KEY_CACHE *keycache,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
170 171
                           const char *where, my_bool lock);

172
#define KEYCACHE_HASH(f, pos)                                                 \
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
173 174
(((ulong) ((pos) >> keycache->key_cache_shift)+                               \
                                     (ulong) (f)) & (keycache->hash_entries-1))
175
#define FILE_HASH(f)                 ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
176

177
#define DEFAULT_KEYCACHE_DEBUG_LOG  "keycache_debug.log"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
178

179 180 181
#if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG)
#define KEYCACHE_DEBUG_LOG  DEFAULT_KEYCACHE_DEBUG_LOG
#endif
bk@work.mysql.com's avatar
bk@work.mysql.com committed
182

183 184 185 186
#if defined(KEYCACHE_DEBUG_LOG)
static FILE *keycache_debug_log=NULL;
static void keycache_debug_print _VARARGS((const char *fmt,...));
#define KEYCACHE_DEBUG_OPEN                                                   \
187
          if (!keycache_debug_log) keycache_debug_log=fopen(KEYCACHE_DEBUG_LOG, "w")
188 189

#define KEYCACHE_DEBUG_CLOSE                                                  \
190
          if (keycache_debug_log) { fclose(keycache_debug_log); keycache_debug_log=0; }
191
#else
192
#define KEYCACHE_DEBUG_OPEN
193 194 195
#define KEYCACHE_DEBUG_CLOSE
#endif /* defined(KEYCACHE_DEBUG_LOG) */

196
#if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG)
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
#define KEYCACHE_DBUG_PRINT(l, m)                                             \
            { if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \
              keycache_debug_print m; }

#define KEYCACHE_DBUG_ASSERT(a)                                               \
            { if (! (a) && keycache_debug_log) fclose(keycache_debug_log);    \
              assert(a); }
#else
#define KEYCACHE_DBUG_PRINT(l, m)  DBUG_PRINT(l, m)
#define KEYCACHE_DBUG_ASSERT(a)    DBUG_ASSERT(a)
#endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */

#if defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF)
static long keycache_thread_id;
#define KEYCACHE_THREAD_TRACE(l)                                              \
             KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id))

#define KEYCACHE_THREAD_TRACE_BEGIN(l)                                        \
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
215 216
            { struct st_my_thread_var *thread_var= my_thread_var;             \
              keycache_thread_id= my_thread_var->id;                          \
217 218 219 220 221
              KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) }

#define KEYCACHE_THREAD_TRACE_END(l)                                          \
            KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id))
#else
222 223 224
#define KEYCACHE_THREAD_TRACE_BEGIN(l)
#define KEYCACHE_THREAD_TRACE_END(l)
#define KEYCACHE_THREAD_TRACE(l)
225 226 227
#endif /* defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF) */

#define BLOCK_NUMBER(b)                                                       \
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
228
  ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
229
#define HASH_LINK_NUMBER(h)                                                   \
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
230
  ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
231 232 233 234 235 236

#if (defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)) || defined(KEYCACHE_DEBUG)
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
                                      pthread_mutex_t *mutex);
#else
#define  keycache_pthread_cond_wait pthread_cond_wait
bk@work.mysql.com's avatar
bk@work.mysql.com committed
237 238
#endif

239 240 241 242 243 244 245 246 247 248 249 250 251 252
#if defined(KEYCACHE_DEBUG)
static int keycache_pthread_mutex_lock(pthread_mutex_t *mutex);
static void keycache_pthread_mutex_unlock(pthread_mutex_t *mutex);
static int keycache_pthread_cond_signal(pthread_cond_t *cond);
static int keycache_pthread_cond_broadcast(pthread_cond_t *cond);
#else
#define keycache_pthread_mutex_lock pthread_mutex_lock
#define keycache_pthread_mutex_unlock pthread_mutex_unlock
#define keycache_pthread_cond_signal pthread_cond_signal
#define keycache_pthread_cond_broadcast pthread_cond_broadcast
#endif /* defined(KEYCACHE_DEBUG) */

static uint next_power(uint value)
{
253
  uint old_value= 1;
254 255
  while (value)
  {
256
    old_value= value;
257 258 259 260
    value&= value-1;
  }
  return (old_value << 1);
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
261 262


263
/*
264 265 266
  Initialize a key cache

  SYNOPSIS
267 268
    init_key_cache()
    keycache			pointer to a key cache data structure
269 270 271 272
    key_cache_block_size	size of blocks to keep cached data
    use_mem                 	total memory to use for the key cache
    division_limit		division limit (may be zero)
    age_threshold		age threshold (may be zero)
273 274 275 276 277 278

  RETURN VALUE
    number of blocks in the key cache, if successful,
    0 - otherwise.

  NOTES.
279 280 281 282
    if keycache->key_cache_inited != 0 we assume that the key cache
    is already initialized.  This is for now used by myisamchk, but shouldn't
    be something that a program should rely on!

283 284
    It's assumed that no two threads call this function simultaneously
    referring to the same key cache handle.
285

286
*/
287

288 289 290
int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
		   ulong use_mem, uint division_limit,
		   uint age_threshold)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
291
{
292 293
  uint blocks, hash_links, length;
  int error;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
294
  DBUG_ENTER("init_key_cache");
295
  DBUG_ASSERT(key_cache_block_size >= 512);
296

297
  KEYCACHE_DEBUG_OPEN;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
298
  if (keycache->key_cache_inited && keycache->disk_blocks > 0)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
299
  {
300 301
    DBUG_PRINT("warning",("key cache already in use"));
    DBUG_RETURN(0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
302
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
303

304 305 306
  keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
  keycache->global_cache_read= keycache->global_cache_write= 0;
  keycache->disk_blocks= -1;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
307
  if (! keycache->key_cache_inited)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
308
  {
309
    keycache->key_cache_inited= 1;
monty@mysql.com's avatar
monty@mysql.com committed
310
    keycache->in_init= 0;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
311
    pthread_mutex_init(&keycache->cache_lock, MY_MUTEX_INIT_FAST);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
312
    keycache->resize_queue.last_thread= NULL;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
313
  }
314

315 316 317 318 319
  keycache->key_cache_mem_size= use_mem;
  keycache->key_cache_block_size= key_cache_block_size;
  keycache->key_cache_shift= my_bit_log2(key_cache_block_size);
  DBUG_PRINT("info", ("key_cache_block_size: %u",
		      key_cache_block_size));
320

321
  blocks= (uint) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
322
			     sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
323
  /* It doesn't make sense to have too few blocks (less than 8) */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
324
  if (blocks >= 8 && keycache->disk_blocks < 0)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
325
  {
326
    for ( ; ; )
bk@work.mysql.com's avatar
bk@work.mysql.com committed
327
    {
328
      /* Set my_hash_entries to the next bigger 2 power */
329
      if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
330
        keycache->hash_entries<<= 1;
331
      hash_links= 2 * blocks;
332 333
#if defined(MAX_THREADS)
      if (hash_links < MAX_THREADS + blocks - 1)
334
        hash_links= MAX_THREADS + blocks - 1;
335
#endif
336 337
      while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
		       ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
338
		       ALIGN_SIZE(sizeof(HASH_LINK*) *
339
                                  keycache->hash_entries))) +
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
340
	     ((ulong) blocks << keycache->key_cache_shift) > use_mem)
341 342
        blocks--;
      /* Allocate memory for cache page buffers */
343
      if ((keycache->block_mem=
344 345
	   my_large_malloc((ulong) blocks * keycache->key_cache_block_size,
			  MYF(MY_WME))))
346
      {
347
        /*
348 349
	  Allocate memory for blocks, hash_links and hash entries;
	  For each block 2 hash links are allocated
350
        */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
351 352
        if ((keycache->block_root= (BLOCK_LINK*) my_malloc((uint) length,
                                                           MYF(0))))
353
          break;
354
        my_large_free(keycache->block_mem, MYF(0));
355
      }
356
      if (blocks < 8)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
357
      {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
358
        my_errno= ENOMEM;
359
        goto err;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
360
      }
361
      blocks= blocks / 4*3;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
362
    }
363
    keycache->blocks_unused= (ulong) blocks;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
364 365 366 367 368 369
    keycache->disk_blocks= (int) blocks;
    keycache->hash_links= hash_links;
    keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
				        ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
    keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
				            ALIGN_SIZE((sizeof(HASH_LINK*) *
370
							keycache->hash_entries)));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
371
    bzero((byte*) keycache->block_root,
372
	  keycache->disk_blocks * sizeof(BLOCK_LINK));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
373
    bzero((byte*) keycache->hash_root,
374
          keycache->hash_entries * sizeof(HASH_LINK*));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
375
    bzero((byte*) keycache->hash_link_root,
376
	  keycache->hash_links * sizeof(HASH_LINK));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
377 378 379
    keycache->hash_links_used= 0;
    keycache->free_hash_list= NULL;
    keycache->blocks_used= keycache->blocks_changed= 0;
380

381
    keycache->global_blocks_changed= 0;
382 383
    keycache->blocks_available=0;		/* For debugging */

384
    /* The LRU chain is empty after initialization */
385 386
    keycache->used_last= NULL;
    keycache->used_ins= NULL;
387
    keycache->free_block_list= NULL;
388 389
    keycache->keycache_time= 0;
    keycache->warm_blocks= 0;
390 391
    keycache->min_warm_blocks= (division_limit ?
				blocks * division_limit / 100 + 1 :
392
				blocks);
393 394
    keycache->age_threshold= (age_threshold ?
			      blocks * age_threshold / 100 :
395
			      blocks);
396

397 398 399 400
    keycache->cnt_for_resize_op= 0;
    keycache->resize_in_flush= 0;
    keycache->can_be_used= 1;

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
401 402
    keycache->waiting_for_hash_link.last_thread= NULL;
    keycache->waiting_for_block.last_thread= NULL;
403
    DBUG_PRINT("exit",
404 405
	       ("disk_blocks: %d  block_root: 0x%lx  hash_entries: %d\
 hash_root: 0x%lx  hash_links: %d  hash_link_root: 0x%lx",
406 407 408
		keycache->disk_blocks, keycache->block_root,
		keycache->hash_entries, keycache->hash_root,
		keycache->hash_links, keycache->hash_link_root));
409 410 411 412
    bzero((gptr) keycache->changed_blocks,
	  sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
    bzero((gptr) keycache->file_blocks,
	  sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
413
  }
414 415

  keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
416
  DBUG_RETURN((int) keycache->disk_blocks);
417

bk@work.mysql.com's avatar
bk@work.mysql.com committed
418
err:
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
419
  error= my_errno;
420
  keycache->disk_blocks= 0;
421
  keycache->blocks=  0;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
422
  if (keycache->block_mem)
423
  {
424
    my_large_free((gptr) keycache->block_mem, MYF(0));
425 426 427 428
    keycache->block_mem= NULL;
  }
  if (keycache->block_root)
  {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
429
    my_free((gptr) keycache->block_root, MYF(0));
430 431
    keycache->block_root= NULL;
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
432
  my_errno= error;
433
  keycache->can_be_used= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
434
  DBUG_RETURN(0);
435
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
436 437


438
/*
439 440 441 442
  Resize a key cache

  SYNOPSIS
    resize_key_cache()
443 444
    keycache     	        pointer to a key cache data structure
    key_cache_block_size        size of blocks to keep cached data
445 446 447
    use_mem			total memory to use for the new key cache
    division_limit		new division limit (if not zero)
    age_threshold		new age threshold (if not zero)
448 449 450 451 452 453 454

  RETURN VALUE
    number of blocks in the key cache, if successful,
    0 - otherwise.

  NOTES.
    The function first compares the memory size and the block size parameters
455 456 457 458 459 460
    with the key cache values.

    If they differ the function free the the memory allocated for the
    old key cache blocks by calling the end_key_cache function and
    then rebuilds the key cache with new blocks by calling
    init_key_cache.
461 462 463 464

    The function starts the operation only when all other threads
    performing operations with the key cache let her to proceed
    (when cnt_for_resize=0).
465
*/
466

467 468 469
int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
		     ulong use_mem, uint division_limit,
		     uint age_threshold)
470
{
471
  int blocks;
472 473
  struct st_my_thread_var *thread;
  KEYCACHE_WQUEUE *wqueue;
474
  DBUG_ENTER("resize_key_cache");
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
475

476
  if (!keycache->key_cache_inited)
477
    DBUG_RETURN(keycache->disk_blocks);
478

479 480 481 482 483
  if(key_cache_block_size == keycache->key_cache_block_size &&
     use_mem == keycache->key_cache_mem_size)
  {
    change_key_cache_param(keycache, division_limit, age_threshold);
    DBUG_RETURN(keycache->disk_blocks);
484
  }
485

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
486
  keycache_pthread_mutex_lock(&keycache->cache_lock);
487 488

  wqueue= &keycache->resize_queue;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
489
  thread= my_thread_var;
490 491 492 493 494 495 496 497
  link_into_queue(wqueue, thread);

  while (wqueue->last_thread->next != thread)
  {
    keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
  }

  keycache->resize_in_flush= 1;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
498
  if (flush_all_key_blocks(keycache))
499
  {
500
    /* TODO: if this happens, we should write a warning in the log file ! */
501 502
    keycache->resize_in_flush= 0;
    blocks= 0;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
503
    keycache->can_be_used= 0;
504
    goto finish;
505
  }
506
  keycache->resize_in_flush= 0;
507
  keycache->can_be_used= 0;
508 509 510 511 512
  while (keycache->cnt_for_resize_op)
  {
    keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
  }

513
  end_key_cache(keycache, 0);			/* Don't free mutex */
514
  /* The following will work even if use_mem is 0 */
515 516
  blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
			 division_limit, age_threshold);
517 518 519 520 521 522

finish:
  unlink_from_queue(wqueue, thread);
  /* Signal for the next resize request to proceeed if any */
  if (wqueue->last_thread)
    keycache_pthread_cond_signal(&wqueue->last_thread->next->suspend);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
523
  keycache_pthread_mutex_unlock(&keycache->cache_lock);
524
  return blocks;
525 526 527
}


528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
/*
  Increment counter blocking resize key cache operation
*/
static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
{
  keycache->cnt_for_resize_op++;
}


/*
  Decrement counter blocking resize key cache operation;
  Signal the operation to proceed when counter becomes equal zero
*/
static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
{
543
  struct st_my_thread_var *last_thread;
544
  if (!--keycache->cnt_for_resize_op &&
545
      (last_thread= keycache->resize_queue.last_thread))
546 547 548
    keycache_pthread_cond_signal(&last_thread->next->suspend);
}

549
/*
550
  Change the key cache parameters
551 552 553

  SYNOPSIS
    change_key_cache_param()
554
    keycache			pointer to a key cache data structure
555 556
    division_limit		new division limit (if not zero)
    age_threshold		new age threshold (if not zero)
557 558 559 560 561 562 563

  RETURN VALUE
    none

  NOTES.
    Presently the function resets the key cache parameters
    concerning midpoint insertion strategy - division_limit and
564
    age_threshold.
565 566
*/

567 568
void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
			    uint age_threshold)
569
{
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
570
  DBUG_ENTER("change_key_cache_param");
571

572
  keycache_pthread_mutex_lock(&keycache->cache_lock);
573 574 575 576 577 578
  if (division_limit)
    keycache->min_warm_blocks= (keycache->disk_blocks *
				division_limit / 100 + 1);
  if (age_threshold)
    keycache->age_threshold=   (keycache->disk_blocks *
				age_threshold / 100);
579
  keycache_pthread_mutex_unlock(&keycache->cache_lock);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
580
  DBUG_VOID_RETURN;
581 582 583
}


584
/*
585
  Remove key_cache from memory
586 587 588

  SYNOPSIS
    end_key_cache()
589 590
    keycache		key cache handle
    cleanup		Complete free (Free also mutex for key cache)
591 592 593

  RETURN VALUE
    none
594
*/
595

596
void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
597 598
{
  DBUG_ENTER("end_key_cache");
599
  DBUG_PRINT("enter", ("key_cache: 0x%lx", keycache));
600

601
  if (!keycache->key_cache_inited)
602
    DBUG_VOID_RETURN;
603

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
604
  if (keycache->disk_blocks > 0)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
605
  {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
606
    if (keycache->block_mem)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
607
    {
608
      my_large_free((gptr) keycache->block_mem, MYF(0));
609
      keycache->block_mem= NULL;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
610
      my_free((gptr) keycache->block_root, MYF(0));
611
      keycache->block_root= NULL;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
612
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
613
    keycache->disk_blocks= -1;
614 615
    /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
    keycache->blocks_changed= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
616
  }
617 618 619 620

  DBUG_PRINT("status",
    ("used: %d  changed: %d  w_requests: %ld  \
writes: %ld  r_requests: %ld  reads: %ld",
621
      keycache->blocks_used, keycache->global_blocks_changed,
622 623 624
      keycache->global_cache_w_requests, keycache->global_cache_write,
      keycache->global_cache_r_requests, keycache->global_cache_read));

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
625 626
  if (cleanup)
  {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
627
    pthread_mutex_destroy(&keycache->cache_lock);
628
    keycache->key_cache_inited= keycache->can_be_used= 0;
629
    KEYCACHE_DEBUG_CLOSE;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
630
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
631 632 633 634
  DBUG_VOID_RETURN;
} /* end_key_cache */


635
/*
636 637 638 639
  Link a thread into double-linked queue of waiting threads.

  SYNOPSIS
    link_into_queue()
640 641
      wqueue              pointer to the queue structure
      thread              pointer to the thread to be added to the queue
642 643 644 645 646 647 648 649

  RETURN VALUE
    none

  NOTES.
    Queue is represented by a circular list of the thread structures
    The list is double-linked of the type (**prev,*next), accessed by
    a pointer to the last element.
650
*/
651

652
static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
653
                                   struct st_my_thread_var *thread)
654
{
655
  struct st_my_thread_var *last;
656
  if (! (last= wqueue->last_thread))
657 658
  {
    /* Queue is empty */
659 660
    thread->next= thread;
    thread->prev= &thread->next;
661 662
  }
  else
663
  {
664 665 666 667
    thread->prev= last->next->prev;
    last->next->prev= &thread->next;
    thread->next= last->next;
    last->next= thread;
668
  }
669
  wqueue->last_thread= thread;
670 671 672
}

/*
673
  Unlink a thread from double-linked queue of waiting threads
674 675 676

  SYNOPSIS
    unlink_from_queue()
677 678
      wqueue              pointer to the queue structure
      thread              pointer to the thread to be removed from the queue
679 680 681 682 683 684

  RETURN VALUE
    none

  NOTES.
    See NOTES for link_into_queue
685
*/
686

687
static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
688
                                     struct st_my_thread_var *thread)
689
{
690 691 692
  KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id));
  if (thread->next == thread)
    /* The queue contains only one member */
693
    wqueue->last_thread= NULL;
694
  else
695
  {
696
    thread->next->prev= thread->prev;
697 698
    *thread->prev=thread->next;
    if (wqueue->last_thread == thread)
699 700
      wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
                                      thread->prev);
701
  }
702
  thread->next= NULL;
703 704 705 706
}


/*
707
  Add a thread to single-linked queue of waiting threads
708 709 710

  SYNOPSIS
    add_to_queue()
711 712
      wqueue              pointer to the queue structure
      thread              pointer to the thread to be added to the queue
713 714 715 716 717 718 719 720

  RETURN VALUE
    none

  NOTES.
    Queue is represented by a circular list of the thread structures
    The list is single-linked of the type (*next), accessed by a pointer
    to the last element.
721
*/
722

723 724
static inline void add_to_queue(KEYCACHE_WQUEUE *wqueue,
                                struct st_my_thread_var *thread)
725
{
726
  struct st_my_thread_var *last;
727 728
  if (! (last= wqueue->last_thread))
    thread->next= thread;
729
  else
730
  {
731 732
    thread->next= last->next;
    last->next= thread;
733
  }
734
  wqueue->last_thread= thread;
735 736 737 738
}


/*
739
  Remove all threads from queue signaling them to proceed
740 741 742

  SYNOPSIS
    realease_queue()
743 744
      wqueue              pointer to the queue structure
      thread              pointer to the thread to be added to the queue
745 746 747 748 749 750 751

  RETURN VALUE
    none

  NOTES.
    See notes for add_to_queue
    When removed from the queue each thread is signaled via condition
752
    variable thread->suspend.
753
*/
754 755 756

static void release_queue(KEYCACHE_WQUEUE *wqueue)
{
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
757 758
  struct st_my_thread_var *last= wqueue->last_thread;
  struct st_my_thread_var *next= last->next;
759 760 761 762 763 764 765
  struct st_my_thread_var *thread;
  do
  {
    thread=next;
    keycache_pthread_cond_signal(&thread->suspend);
    KEYCACHE_DBUG_PRINT("release_queue: signal", ("thread %ld", thread->id));
    next=thread->next;
766
    thread->next= NULL;
767 768
  }
  while (thread != last);
769
  wqueue->last_thread= NULL;
770 771 772 773
}


/*
774
  Unlink a block from the chain of dirty/clean blocks
775
*/
776

777
static inline void unlink_changed(BLOCK_LINK *block)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
778
{
779
  if (block->next_changed)
780 781
    block->next_changed->prev_changed= block->prev_changed;
  *block->prev_changed= block->next_changed;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
782 783 784
}


785
/*
786
  Link a block into the chain of dirty/clean blocks
787
*/
788

789
static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
790
{
791 792
  block->prev_changed= phead;
  if ((block->next_changed= *phead))
793
    (*phead)->prev_changed= &block->next_changed;
794
  *phead= block;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
795 796
}

797 798

/*
799 800
  Unlink a block from the chain of dirty/clean blocks, if it's asked for,
  and link it to the chain of clean blocks for the specified file
801
*/
802

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
803 804
static void link_to_file_list(KEY_CACHE *keycache,
                              BLOCK_LINK *block, int file, my_bool unlink)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
805
{
806 807
  if (unlink)
    unlink_changed(block);
808
  link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
809 810
  if (block->status & BLOCK_CHANGED)
  {
811
    block->status&= ~BLOCK_CHANGED;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
812
    keycache->blocks_changed--;
813
    keycache->global_blocks_changed--;
814
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
815 816
}

817

818 819 820
/*
  Unlink a block from the chain of clean blocks for the specified
  file and link it to the chain of dirty blocks for this file
821
*/
822

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
823 824
static inline void link_to_changed_list(KEY_CACHE *keycache,
                                        BLOCK_LINK *block)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
825
{
826
  unlink_changed(block);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
827 828
  link_changed(block,
               &keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
829
  block->status|=BLOCK_CHANGED;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
830
  keycache->blocks_changed++;
831
  keycache->global_blocks_changed++;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
832 833 834
}


835
/*
836 837 838 839 840
  Link a block to the LRU chain at the beginning or at the end of
  one of two parts.

  SYNOPSIS
    link_block()
841
      keycache            pointer to a key cache data structure
842 843 844 845 846 847 848 849
      block               pointer to the block to link to the LRU chain
      hot                 <-> to link the block into the hot subchain
      at_end              <-> to link the block at the end of the subchain

  RETURN VALUE
    none

  NOTES.
850 851 852
    The LRU chain is represented by a curcular list of block structures.
    The list is double-linked of the type (**prev,*next) type.
    The LRU chain is divided into two parts - hot and warm.
853
    There are two pointers to access the last blocks of these two
854
    parts. The beginning of the warm part follows right after the
855 856 857 858 859 860 861 862 863 864 865 866 867
    end of the hot part.
    Only blocks of the warm part can be used for replacement.
    The first block from the beginning of this subchain is always
    taken for eviction (keycache->last_used->next)

    LRU chain:       +------+   H O T    +------+
                +----| end  |----...<----| beg  |----+
                |    +------+last        +------+    |
                v<-link in latest hot (new end)      |
                |     link in latest warm (new end)->^
                |    +------+  W A R M   +------+    |
                +----| beg  |---->...----| end  |----+
                     +------+            +------+ins
868
                  first for eviction
869
*/
870

871 872
static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot,
                       my_bool at_end)
873
{
874 875 876
  BLOCK_LINK *ins;
  BLOCK_LINK **pins;

877
  KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests));
878 879
  if (!hot && keycache->waiting_for_block.last_thread) {
    /* Signal that in the LRU warm sub-chain an available block has appeared */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
880 881 882 883
    struct st_my_thread_var *last_thread=
                               keycache->waiting_for_block.last_thread;
    struct st_my_thread_var *first_thread= last_thread->next;
    struct st_my_thread_var *next_thread= first_thread;
884 885 886 887
    HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
    struct st_my_thread_var *thread;
    do
    {
888 889
      thread= next_thread;
      next_thread= thread->next;
890
      /*
891 892 893 894 895 896
         We notify about the event all threads that ask
         for the same page as the first thread in the queue
      */
      if ((HASH_LINK *) thread->opt_info == hash_link)
      {
        keycache_pthread_cond_signal(&thread->suspend);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
897
        unlink_from_queue(&keycache->waiting_for_block, thread);
898 899
        block->requests++;
      }
900
    }
901
    while (thread != last_thread);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
902
    hash_link->block= block;
903 904
    KEYCACHE_THREAD_TRACE("link_block: after signaling");
#if defined(KEYCACHE_DEBUG)
905
    KEYCACHE_DBUG_PRINT("link_block",
906
        ("linked,unlinked block %u  status=%x  #requests=%u  #available=%u",
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
907
         BLOCK_NUMBER(block), block->status,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
908
         block->requests, keycache->blocks_available));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
909
#endif
910 911
    return;
  }
912
  pins= hot ? &keycache->used_ins : &keycache->used_last;
913
  ins= *pins;
914
  if (ins)
915
  {
916 917 918 919
    ins->next_used->prev_used= &block->next_used;
    block->next_used= ins->next_used;
    block->prev_used= &ins->next_used;
    ins->next_used= block;
920
    if (at_end)
921
      *pins= block;
922 923 924 925
  }
  else
  {
    /* The LRU chain is empty */
926
    keycache->used_last= keycache->used_ins= block->next_used= block;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
927
    block->prev_used= &block->next_used;
928 929 930
  }
  KEYCACHE_THREAD_TRACE("link_block");
#if defined(KEYCACHE_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
931
  keycache->blocks_available++;
932
  KEYCACHE_DBUG_PRINT("link_block",
933
      ("linked block %u:%1u  status=%x  #requests=%u  #available=%u",
934
       BLOCK_NUMBER(block), at_end, block->status,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
935
       block->requests, keycache->blocks_available));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
936 937
  KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <=
                       keycache->blocks_used);
938 939
#endif
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
940

941 942

/*
943
  Unlink a block from the LRU chain
944 945 946

  SYNOPSIS
    unlink_block()
947
      keycache            pointer to a key cache data structure
948 949 950 951 952 953 954
      block               pointer to the block to unlink from the LRU chain

  RETURN VALUE
    none

  NOTES.
    See NOTES for link_block
955
*/
956

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
957
static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
958 959 960
{
  if (block->next_used == block)
    /* The list contains only one member */
961
    keycache->used_last= keycache->used_ins= NULL;
962
  else
963
  {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
964 965 966 967
    block->next_used->prev_used= block->prev_used;
    *block->prev_used= block->next_used;
    if (keycache->used_last == block)
      keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
968 969
    if (keycache->used_ins == block)
      keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
970
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
971
  block->next_used= NULL;
972

973 974
  KEYCACHE_THREAD_TRACE("unlink_block");
#if defined(KEYCACHE_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
975
  keycache->blocks_available--;
976
  KEYCACHE_DBUG_PRINT("unlink_block",
977
    ("unlinked block %u  status=%x   #requests=%u  #available=%u",
978
     BLOCK_NUMBER(block), block->status,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
979 980
     block->requests, keycache->blocks_available));
  KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0);
981 982 983 984 985
#endif
}


/*
986
  Register requests for a block
987
*/
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
988
static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
989 990 991
{
  if (! block->requests)
    /* First request for the block unlinks it */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
992
    unlink_block(keycache, block);
993 994 995 996
  block->requests+=count;
}


997 998 999
/*
  Unregister request for a block
  linking it to the LRU chain if it's the last request
1000 1001 1002 1003

  SYNOPSIS

    unreg_block()
1004
      keycache            pointer to a key cache data structure
1005 1006 1007 1008 1009 1010
      block               pointer to the block to link to the LRU chain
      at_end              <-> to link the block at the end of the LRU chain

  RETURN VALUE
    none

1011
  NOTES.
1012 1013 1014
    Every linking to the LRU chain decrements by one a special block
    counter (if it's positive). If the at_end parameter is TRUE the block is
    added either at the end of warm sub-chain or at the end of hot sub-chain.
1015 1016
    It is added to the hot subchain if its counter is zero and number of
    blocks in warm sub-chain is not less than some low limit (determined by
1017 1018
    the division_limit parameter). Otherwise the block is added to the warm
    sub-chain. If the at_end parameter is FALSE the block is always added
1019
    at beginning of the warm sub-chain.
1020 1021 1022 1023
    Thus a warm block can be promoted to the hot sub-chain when its counter
    becomes zero for the first time.
    At the same time  the block at the very beginning of the hot subchain
    might be moved to the beginning of the warm subchain if it stays untouched
1024
    for a too long time (this time is determined by parameter age_threshold).
1025
*/
1026

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1027 1028
static inline void unreg_request(KEY_CACHE *keycache,
                                 BLOCK_LINK *block, int at_end)
1029 1030
{
  if (! --block->requests)
1031 1032 1033 1034 1035 1036 1037 1038
  {
    my_bool hot;
    if (block->hits_left)
      block->hits_left--;
    hot= !block->hits_left && at_end &&
      keycache->warm_blocks > keycache->min_warm_blocks;
    if (hot)
    {
1039 1040 1041
      if (block->temperature == BLOCK_WARM)
        keycache->warm_blocks--;
      block->temperature= BLOCK_HOT;
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
      KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u",
                           keycache->warm_blocks));
    }
    link_block(keycache, block, hot, (my_bool)at_end);
    block->last_hit_time= keycache->keycache_time;
    if (++keycache->keycache_time - keycache->used_ins->last_hit_time >
	keycache->age_threshold)
    {
      block= keycache->used_ins;
      unlink_block(keycache, block);
      link_block(keycache, block, 0, 0);
1053 1054 1055 1056 1057
      if (block->temperature != BLOCK_WARM)
      {
        keycache->warm_blocks++;
        block->temperature= BLOCK_WARM;
      }
1058 1059 1060 1061
      KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks=%u",
                           keycache->warm_blocks));
    }
  }
1062 1063 1064
}

/*
1065
  Remove a reader of the page in block
1066
*/
1067

1068
static inline void remove_reader(BLOCK_LINK *block)
1069
{
1070 1071 1072 1073 1074 1075
  if (! --block->hash_link->requests && block->condvar)
    keycache_pthread_cond_signal(block->condvar);
}


/*
1076 1077
  Wait until the last reader of the page in block
  signals on its termination
1078
*/
1079

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1080
static inline void wait_for_readers(KEY_CACHE *keycache, BLOCK_LINK *block)
1081
{
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1082
  struct st_my_thread_var *thread= my_thread_var;
1083 1084
  while (block->hash_link->requests)
  {
1085
    block->condvar= &thread->suspend;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1086
    keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1087
    block->condvar= NULL;
1088 1089 1090 1091 1092
  }
}


/*
1093
  Add a hash link to a bucket in the hash_table
1094
*/
1095

1096 1097 1098
static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
{
  if (*start)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1099 1100 1101 1102
    (*start)->prev= &hash_link->next;
  hash_link->next= *start;
  hash_link->prev= start;
  *start= hash_link;
1103 1104 1105 1106
}


/*
1107
  Remove a hash link from the hash table
1108
*/
1109

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1110
static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1111
{
1112
  KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u  pos_ %lu  #requests=%u",
1113 1114
      (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests));
  KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1115 1116 1117 1118
  if ((*hash_link->prev= hash_link->next))
    hash_link->next->prev= hash_link->prev;
  hash_link->block= NULL;
  if (keycache->waiting_for_hash_link.last_thread)
1119
  {
1120
    /* Signal that a free hash link has appeared */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1121 1122 1123 1124
    struct st_my_thread_var *last_thread=
                               keycache->waiting_for_hash_link.last_thread;
    struct st_my_thread_var *first_thread= last_thread->next;
    struct st_my_thread_var *next_thread= first_thread;
1125 1126 1127
    KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
    struct st_my_thread_var *thread;

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1128 1129
    hash_link->file= first_page->file;
    hash_link->diskpos= first_page->filepos;
1130 1131 1132
    do
    {
      KEYCACHE_PAGE *page;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1133
      thread= next_thread;
1134
      page= (KEYCACHE_PAGE *) thread->opt_info;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1135
      next_thread= thread->next;
1136
      /*
1137 1138 1139 1140 1141 1142
         We notify about the event all threads that ask
         for the same page as the first thread in the queue
      */
      if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
      {
        keycache_pthread_cond_signal(&thread->suspend);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1143
        unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1144 1145 1146
      }
    }
    while (thread != last_thread);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1147 1148 1149
    link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
					         hash_link->diskpos)],
              hash_link);
1150
    return;
1151
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1152 1153
  hash_link->next= keycache->free_hash_list;
  keycache->free_hash_list= hash_link;
1154 1155
}

1156

1157
/*
1158
  Get the hash link for a page
1159
*/
1160

1161
static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1162
                                int file, my_off_t filepos)
1163 1164 1165 1166 1167 1168 1169
{
  reg1 HASH_LINK *hash_link, **start;
  KEYCACHE_PAGE page;
#if defined(KEYCACHE_DEBUG)
  int cnt;
#endif

1170
  KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u  pos: %lu",
1171 1172 1173 1174 1175 1176 1177 1178
                      (uint) file,(ulong) filepos));

restart:
  /*
     Find the bucket in the hash table for the pair (file, filepos);
     start contains the head of the bucket list,
     hash_link points to the first member of the list
  */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1179
  hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1180
#if defined(KEYCACHE_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1181
  cnt= 0;
1182 1183 1184 1185 1186 1187 1188 1189
#endif
  /* Look for an element for the pair (file, filepos) in the bucket chain */
  while (hash_link &&
         (hash_link->diskpos != filepos || hash_link->file != file))
  {
    hash_link= hash_link->next;
#if defined(KEYCACHE_DEBUG)
    cnt++;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1190
    if (! (cnt <= keycache->hash_links_used))
1191 1192
    {
      int i;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1193 1194
      for (i=0, hash_link= *start ;
           i < cnt ; i++, hash_link= hash_link->next)
1195
      {
1196
        KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u  pos: %lu",
1197 1198 1199
            (uint) hash_link->file,(ulong) hash_link->diskpos));
      }
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1200
    KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used);
1201 1202 1203
#endif
  }
  if (! hash_link)
1204 1205
  {
    /* There is no hash link in the hash table for the pair (file, filepos) */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1206
    if (keycache->free_hash_list)
1207
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1208
      hash_link= keycache->free_hash_list;
1209
      keycache->free_hash_list= hash_link->next;
1210
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1211
    else if (keycache->hash_links_used < keycache->hash_links)
1212
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1213
      hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1214 1215
    }
    else
1216 1217
    {
      /* Wait for a free hash link */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1218
      struct st_my_thread_var *thread= my_thread_var;
1219
      KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
1220 1221
      page.file= file;
      page.filepos= filepos;
1222
      thread->opt_info= (void *) &page;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1223 1224
      link_into_queue(&keycache->waiting_for_hash_link, thread);
      keycache_pthread_cond_wait(&thread->suspend,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1225
                                 &keycache->cache_lock);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1226
      thread->opt_info= NULL;
1227 1228
      goto restart;
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1229 1230
    hash_link->file= file;
    hash_link->diskpos= filepos;
1231 1232 1233 1234
    link_hash(start, hash_link);
  }
  /* Register the request for the page */
  hash_link->requests++;
1235

1236 1237 1238 1239 1240
  return hash_link;
}


/*
1241 1242
  Get a block for the file page requested by a keycache read/write operation;
  If the page is not in the cache return a free block, if there is none
1243
  return the lru block after saving its buffer if the page is dirty.
1244

1245 1246 1247
  SYNOPSIS

    find_key_block()
1248
      keycache            pointer to a key cache data structure
1249 1250 1251 1252
      file                handler for the file to read page from
      filepos             position of the page in the file
      init_hits_left      how initialize the block counter for the page
      wrmode              <-> get for writing
1253
      page_st        out  {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264

  RETURN VALUE
    Pointer to the found block if successful, 0 - otherwise

  NOTES.
    For the page from file positioned at filepos the function checks whether
    the page is in the key cache specified by the first parameter.
    If this is the case it immediately returns the block.
    If not, the function first chooses  a block for this page. If there is
    no not used blocks in the key cache yet, the function takes the block
    at the very beginning of the warm sub-chain. It saves the page in that
1265
    block if it's dirty before returning the pointer to it.
1266 1267 1268
    The function returns in the page_st parameter the following values:
      PAGE_READ         - if page already in the block,
      PAGE_TO_BE_READ   - if it is to be read yet by the current thread
1269
      WAIT_TO_BE_READ   - if it is to be read by another thread
1270 1271 1272
    If an error occurs THE BLOCK_ERROR bit is set in the block status.
    It might happen that there are no blocks in LRU chain (in warm part) -
    all blocks  are unlinked for some read/write operations. Then the function
1273
    waits until first of this operations links any block back.
1274 1275
*/

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1276
static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1277 1278
                                  File file, my_off_t filepos,
                                  int init_hits_left,
1279 1280 1281 1282
                                  int wrmode, int *page_st)
{
  HASH_LINK *hash_link;
  BLOCK_LINK *block;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1283
  int error= 0;
1284
  int page_status;
1285

1286 1287
  DBUG_ENTER("find_key_block");
  KEYCACHE_THREAD_TRACE("find_key_block:begin");
1288 1289 1290 1291 1292
  DBUG_PRINT("enter", ("fd: %u  pos %lu  wrmode: %lu",
                       (uint) file, (ulong) filepos, (uint) wrmode));
  KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %u  pos: %lu  wrmode: %lu",
                                         (uint) file, (ulong) filepos,
                                         (uint) wrmode));
1293
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1294 1295
  DBUG_EXECUTE("check_keycache2",
               test_key_cache(keycache, "start of find_key_block", 0););
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1296
#endif
1297

1298 1299
restart:
  /* Find the hash link for the requested page (file, filepos) */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1300
  hash_link= get_hash_link(keycache, file, filepos);
1301

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1302 1303
  page_status= -1;
  if ((block= hash_link->block) &&
1304
      block->hash_link == hash_link && (block->status & BLOCK_READ))
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1305
    page_status= PAGE_READ;
1306

1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
  if (wrmode && keycache->resize_in_flush)
  {
    /* This is a write request during the flush phase of a resize operation */

    if (page_status != PAGE_READ)
    {
      /* We don't need the page in the cache: we are going to write on disk */
      hash_link->requests--;
      unlink_hash(keycache, hash_link);
      return 0;
    }
    if (!(block->status & BLOCK_IN_FLUSH))
1319
    {
1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
      hash_link->requests--;
      /*
        Remove block to invalidate the page in the block buffer
        as we are going to write directly on disk.
        Although we have an exlusive lock for the updated key part
        the control can be yieded by the current thread as we might
        have unfinished readers of other key parts in the block
        buffer. Still we are guaranteed not to have any readers
        of the key part we are writing into until the block is
        removed from the cache as we set the BLOCL_REASSIGNED
1330
        flag (see the code below that handles reading requests).
1331
      */
1332
      free_block(keycache, block);
1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
      return 0;
    }
    /* Wait intil the page is flushed on disk */
    hash_link->requests--;
    {
      struct st_my_thread_var *thread= my_thread_var;
      add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
      do
      {
        keycache_pthread_cond_wait(&thread->suspend,
                                   &keycache->cache_lock);
      }
      while(thread->next);
    }
    /* Invalidate page in the block if it has not been done yet */
    if (block->status)
      free_block(keycache, block);
    return 0;
  }
1352

1353 1354
  if (page_status == PAGE_READ &&
      (block->status & (BLOCK_IN_SWITCH | BLOCK_REASSIGNED)))
1355 1356
  {
    /* This is a request for a page to be removed from cache */
1357

1358
    KEYCACHE_DBUG_PRINT("find_key_block",
1359
             ("request for old page in block %u",BLOCK_NUMBER(block)));
1360
    /*
1361 1362 1363 1364
       Only reading requests can proceed until the old dirty page is flushed,
       all others are to be suspended, then resubmitted
    */
    if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1365
      reg_requests(keycache, block, 1);
1366 1367 1368
    else
    {
      hash_link->requests--;
1369
      KEYCACHE_DBUG_PRINT("find_key_block",
1370 1371
                          ("request waiting for old page to be saved"));
      {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1372
        struct st_my_thread_var *thread= my_thread_var;
1373 1374 1375 1376 1377
        /* Put the request into the queue of those waiting for the old page */
        add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
        /* Wait until the request can be resubmitted */
        do
        {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1378
          keycache_pthread_cond_wait(&thread->suspend,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1379
                                     &keycache->cache_lock);
1380 1381 1382
        }
        while(thread->next);
      }
1383
      KEYCACHE_DBUG_PRINT("find_key_block",
1384 1385 1386 1387 1388 1389
                          ("request for old page resubmitted"));
      /* Resubmit the request */
      goto restart;
    }
  }
  else
1390 1391
  {
    /* This is a request for a new page or for a page not to be removed */
1392
    if (! block)
1393 1394
    {
      /* No block is assigned for the page yet */
1395
      if (keycache->blocks_unused)
1396
      {
1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
        if (keycache->free_block_list)
        {
          /* There is a block in the free list. */
          block= keycache->free_block_list;
          keycache->free_block_list= block->next_used;
          block->next_used= NULL;
        }
        else
        {
          /* There are some never used blocks, take first of them */
          block= &keycache->block_root[keycache->blocks_used];
          block->buffer= ADD_TO_PTR(keycache->block_mem,
                                    ((ulong) keycache->blocks_used*
                                     keycache->key_cache_block_size),
                                    byte*);
          keycache->blocks_used++;
        }
        keycache->blocks_unused--;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1415 1416 1417 1418
        block->status= 0;
        block->length= 0;
        block->offset= keycache->key_cache_block_size;
        block->requests= 1;
1419
        block->temperature= BLOCK_COLD;
1420 1421
        block->hits_left= init_hits_left;
        block->last_hit_time= 0;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1422 1423
        link_to_file_list(keycache, block, file, 0);
        block->hash_link= hash_link;
1424
        hash_link->block= block;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1425
        page_status= PAGE_TO_BE_READ;
1426
        KEYCACHE_DBUG_PRINT("find_key_block",
1427 1428
                            ("got free or never used block %u",
                             BLOCK_NUMBER(block)));
1429 1430
      }
      else
1431 1432
      {
	/* There are no never used blocks, use a block from the LRU chain */
1433

1434
        /*
1435 1436 1437
          Wait until a new block is added to the LRU chain;
          several threads might wait here for the same page,
          all of them must get the same block
1438
        */
1439

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1440
        if (! keycache->used_last)
1441
        {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1442 1443 1444
          struct st_my_thread_var *thread= my_thread_var;
          thread->opt_info= (void *) hash_link;
          link_into_queue(&keycache->waiting_for_block, thread);
1445
          do
1446
          {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1447
            keycache_pthread_cond_wait(&thread->suspend,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1448
                                       &keycache->cache_lock);
1449 1450
          }
          while (thread->next);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1451
          thread->opt_info= NULL;
1452
        }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1453
        block= hash_link->block;
1454 1455
        if (! block)
        {
1456 1457
          /*
             Take the first block from the LRU chain
1458 1459
             unlinking it from the chain
          */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1460
          block= keycache->used_last->next_used;
1461 1462
          block->hits_left= init_hits_left;
          block->last_hit_time= 0;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1463 1464
          reg_requests(keycache, block,1);
          hash_link->block= block;
1465
        }
1466 1467 1468 1469 1470

        if (block->hash_link != hash_link &&
	    ! (block->status & BLOCK_IN_SWITCH) )
        {
	  /* this is a primary request for a new page */
1471
          block->status|= BLOCK_IN_SWITCH;
1472 1473

          KEYCACHE_DBUG_PRINT("find_key_block",
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1474
                        ("got block %u for new page", BLOCK_NUMBER(block)));
1475

1476
          if (block->status & BLOCK_CHANGED)
1477 1478 1479
          {
	    /* The block contains a dirty page - push it out of the cache */

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1480
            KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
1481

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1482
            keycache_pthread_mutex_unlock(&keycache->cache_lock);
1483 1484 1485
            /*
	      The call is thread safe because only the current
	      thread might change the block->hash_link value
1486
            */
1487 1488 1489 1490 1491
	    error= my_pwrite(block->hash_link->file,
			     block->buffer+block->offset,
			     block->length - block->offset,
			     block->hash_link->diskpos+ block->offset,
			     MYF(MY_NABP | MY_WAIT_IF_FULL));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1492
            keycache_pthread_mutex_lock(&keycache->cache_lock);
1493
	    keycache->global_cache_write++;
1494
          }
1495

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1496
          block->status|= BLOCK_REASSIGNED;
1497 1498
          if (block->hash_link)
          {
1499 1500 1501 1502 1503
            /*
	      Wait until all pending read requests
	      for this page are executed
	      (we could have avoided this waiting, if we had read
	      a page in the cache in a sweep, without yielding control)
1504
            */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1505
            wait_for_readers(keycache, block);
1506

1507
            /* Remove the hash link for this page from the hash table */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1508
            unlink_hash(keycache, block->hash_link);
1509 1510 1511 1512
            /* All pending requests for this page must be resubmitted */
            if (block->wqueue[COND_FOR_SAVED].last_thread)
              release_queue(&block->wqueue[COND_FOR_SAVED]);
          }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1513 1514 1515 1516 1517 1518 1519
          link_to_file_list(keycache, block, file,
                            (my_bool)(block->hash_link ? 1 : 0));
          block->status= error? BLOCK_ERROR : 0;
          block->length= 0;
          block->offset= keycache->key_cache_block_size;
          block->hash_link= hash_link;
          page_status= PAGE_TO_BE_READ;
1520

1521 1522 1523 1524 1525 1526
          KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
          KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
        }
        else
        {
          /* This is for secondary requests for a new page only */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1527 1528 1529
            page_status= block->hash_link == hash_link &&
                           (block->status & BLOCK_READ) ?
                              PAGE_READ : PAGE_WAIT_TO_BE_READ;
1530 1531
        }
      }
1532
      keycache->global_cache_read++;
1533 1534 1535
    }
    else
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1536
      reg_requests(keycache, block, 1);
1537 1538 1539 1540 1541
      page_status = block->hash_link == hash_link &&
                    (block->status & BLOCK_READ) ?
                      PAGE_READ : PAGE_WAIT_TO_BE_READ;
    }
  }
1542

1543 1544
  KEYCACHE_DBUG_ASSERT(page_status != -1);
  *page_st=page_status;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1545
  KEYCACHE_DBUG_PRINT("find_key_block",
1546
                      ("fd: %u  pos %lu  page_status %lu",
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1547
                      (uint) file,(ulong) filepos,(uint) page_status));
1548

1549
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1550 1551
  DBUG_EXECUTE("check_keycache2",
               test_key_cache(keycache, "end of find_key_block",0););
1552 1553 1554 1555
#endif
  KEYCACHE_THREAD_TRACE("find_key_block:end");
  DBUG_RETURN(block);
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1556 1557


1558
/*
1559 1560 1561 1562 1563
  Read into a key cache block buffer from disk.

  SYNOPSIS

    read_block()
1564
      keycache            pointer to a key cache data structure
1565
      block               block to which buffer the data is to be read
1566 1567 1568 1569
      read_length         size of data to be read
      min_length          at least so much data must be read
      primary             <-> the current thread will read the data

1570 1571 1572 1573 1574 1575 1576 1577 1578 1579
  RETURN VALUE
    None

  NOTES.
    The function either reads a page data from file to the block buffer,
    or waits until another thread reads it. What page to read is determined
    by a block parameter - reference to a hash link for this page.
    If an error occurs THE BLOCK_ERROR bit is set in the block status.
    We do not report error when the size of successfully read
    portion is less than read_length, but not less than min_length.
1580
*/
1581

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1582 1583
static void read_block(KEY_CACHE *keycache,
                       BLOCK_LINK *block, uint read_length,
1584 1585 1586
                       uint min_length, my_bool primary)
{
  uint got_length;
1587

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1588
  /* On entry cache_lock is locked */
1589

1590 1591
  KEYCACHE_THREAD_TRACE("read_block");
  if (primary)
1592 1593 1594 1595
  {
    /*
      This code is executed only by threads
      that submitted primary requests
1596
    */
1597 1598

    KEYCACHE_DBUG_PRINT("read_block",
1599
                        ("page to be read by primary request"));
1600

1601
    /* Page is not in buffer yet, is to be read from disk */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1602
    keycache_pthread_mutex_unlock(&keycache->cache_lock);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1603 1604
    got_length= my_pread(block->hash_link->file, block->buffer,
                         read_length, block->hash_link->diskpos, MYF(0));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1605
    keycache_pthread_mutex_lock(&keycache->cache_lock);
1606
    if (got_length < min_length)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1607
      block->status|= BLOCK_ERROR;
1608 1609
    else
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1610 1611
      block->status= BLOCK_READ;
      block->length= got_length;
1612
    }
1613
    KEYCACHE_DBUG_PRINT("read_block",
1614 1615 1616 1617 1618
                        ("primary request: new page in cache"));
    /* Signal that all pending requests for this page now can be processed */
    if (block->wqueue[COND_FOR_REQUESTED].last_thread)
      release_queue(&block->wqueue[COND_FOR_REQUESTED]);
  }
1619 1620 1621 1622 1623
  else
  {
    /*
      This code is executed only by threads
      that submitted secondary requests
1624
    */
1625
    KEYCACHE_DBUG_PRINT("read_block",
1626 1627
                      ("secondary request waiting for new page to be read"));
    {
1628
      struct st_my_thread_var *thread= my_thread_var;
1629
      /* Put the request into a queue and wait until it can be processed */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1630
      add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
1631 1632
      do
      {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1633
        keycache_pthread_cond_wait(&thread->suspend,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1634
                                   &keycache->cache_lock);
1635 1636 1637
      }
      while (thread->next);
    }
1638
    KEYCACHE_DBUG_PRINT("read_block",
1639 1640 1641 1642 1643 1644
                        ("secondary request: new page in cache"));
  }
}


/*
1645
  Read a block of data from a cached file into a buffer;
1646 1647 1648 1649

  SYNOPSIS

    key_cache_read()
1650
      keycache            pointer to a key cache data structure
1651 1652 1653
      file                handler for the file for the block of data to be read
      filepos             position of the block of data in the file
      level               determines the weight of the data
1654
      buff                buffer to where the data must be placed
1655
      length              length of the buffer
1656 1657 1658
      block_length        length of the block in the key cache buffer
      return_buffer       return pointer to the key cache buffer with the data

1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669
  RETURN VALUE
    Returns address from where the data is placed if sucessful, 0 - otherwise.

  NOTES.
    The function ensures that a block of data of size length from file
    positioned at filepos is in the buffers for some key cache blocks.
    Then the function either copies the data into the buffer buff, or,
    if return_buffer is TRUE, it just returns the pointer to the key cache
    buffer with the data.
    Filepos must be a multiple of 'block_length', but it doesn't
    have to be a multiple of key_cache_block_size;
1670
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1671

1672
byte *key_cache_read(KEY_CACHE *keycache,
1673 1674
                     File file, my_off_t filepos, int level,
                     byte *buff, uint length,
1675 1676
		     uint block_length __attribute__((unused)),
		     int return_buffer __attribute__((unused)))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1677 1678
{
  int error=0;
1679 1680
  uint offset= 0;
  byte *start= buff;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1681
  DBUG_ENTER("key_cache_read");
1682
  DBUG_PRINT("enter", ("fd: %u  pos: %lu  length: %u",
1683
               (uint) file, (ulong) filepos, length));
1684

1685
  if (keycache->can_be_used)
1686 1687
  {
    /* Key cache is used */
1688
    reg1 BLOCK_LINK *block;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1689
    uint read_length;
1690 1691
    uint status;
    int page_st;
1692

1693
    /* Read data in key_cache_block_size increments */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1694 1695
    do
    {
1696
      keycache_pthread_mutex_lock(&keycache->cache_lock);
1697
      if (!keycache->can_be_used)
1698 1699 1700 1701 1702 1703
      {
	keycache_pthread_mutex_unlock(&keycache->cache_lock);
	goto no_key_cache;
      }
      offset= (uint) (filepos & (keycache->key_cache_block_size-1));
      filepos-= offset;
1704 1705 1706 1707
      read_length= length;
      set_if_smaller(read_length, keycache->key_cache_block_size-offset);
      KEYCACHE_DBUG_ASSERT(read_length > 0);

1708 1709 1710 1711 1712
#ifndef THREAD
      if (block_length > keycache->key_cache_block_size || offset)
	return_buffer=0;
#endif

1713
      inc_counter_for_resize_op(keycache);
1714
      keycache->global_cache_r_requests++;
1715
      block=find_key_block(keycache, file, filepos, level, 0, &page_st);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1716
      if (block->status != BLOCK_ERROR && page_st != PAGE_READ)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1717
      {
1718
        /* The requested page is to be read into the block buffer */
1719
        read_block(keycache, block,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1720
                   keycache->key_cache_block_size, read_length+offset,
1721
                   (my_bool)(page_st == PAGE_TO_BE_READ));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1722
      }
1723 1724
      else if (! (block->status & BLOCK_ERROR) &&
               block->length < read_length + offset)
1725 1726 1727 1728 1729
      {
        /*
           Impossible if nothing goes wrong:
           this could only happen if we are using a file with
           small key blocks and are trying to read outside the file
1730
        */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1731 1732
        my_errno= -1;
        block->status|= BLOCK_ERROR;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1733
      }
1734

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1735
      if (! ((status= block->status) & BLOCK_ERROR))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1736
      {
1737
#ifndef THREAD
1738
        if (! return_buffer)
1739 1740 1741
#endif
        {
#if !defined(SERIALIZED_READ_FROM_CACHE)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1742
          keycache_pthread_mutex_unlock(&keycache->cache_lock);
1743
#endif
1744

1745 1746
          /* Copy data from the cache buffer */
          if (!(read_length & 511))
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1747
            bmove512(buff, block->buffer+offset, read_length);
1748
          else
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1749
            memcpy(buff, block->buffer+offset, (size_t) read_length);
1750 1751

#if !defined(SERIALIZED_READ_FROM_CACHE)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1752
          keycache_pthread_mutex_lock(&keycache->cache_lock);
1753 1754
#endif
        }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1755
      }
1756

1757
      remove_reader(block);
1758 1759 1760
      /*
         Link the block into the LRU chain
         if it's the last submitted request for the block
1761
      */
1762
      unreg_request(keycache, block, 1);
1763

1764 1765
      dec_counter_for_resize_op(keycache);

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1766
      keycache_pthread_mutex_unlock(&keycache->cache_lock);
1767

1768 1769
      if (status & BLOCK_ERROR)
        DBUG_RETURN((byte *) 0);
1770

1771
#ifndef THREAD
1772
      /* This is only true if we where able to read everything in one block */
1773
      if (return_buffer)
1774
	return (block->buffer);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1775
#endif
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1776
      buff+= read_length;
1777
      filepos+= read_length+offset;
1778

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1779
    } while ((length-= read_length));
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1780
    DBUG_RETURN(start);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1781
  }
1782

1783 1784 1785 1786 1787 1788
no_key_cache:					/* Key cache is not used */

  /* We can't use mutex here as the key cache may not be initialized */
  keycache->global_cache_r_requests++;
  keycache->global_cache_read++;
  if (my_pread(file, (byte*) buff, length, filepos+offset, MYF(MY_NABP)))
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1789
    error= 1;
1790
  DBUG_RETURN(error ? (byte*) 0 : start);
1791
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1792 1793


igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1794 1795 1796 1797
/*
  Insert a block of file data from a buffer into key cache

  SYNOPSIS
1798
    key_cache_insert()
1799 1800 1801 1802 1803 1804 1805 1806 1807 1808
    keycache            pointer to a key cache data structure
    file                handler for the file to insert data from
    filepos             position of the block of data in the file to insert
    level               determines the weight of the data
    buff                buffer to read data from
    length              length of the data in the buffer

  NOTES
    This is used by MyISAM to move all blocks from a index file to the key
    cache
1809

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1810
  RETURN VALUE
1811
    0 if a success, 1 - otherwise.
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1812 1813
*/

1814
int key_cache_insert(KEY_CACHE *keycache,
1815 1816
                     File file, my_off_t filepos, int level,
                     byte *buff, uint length)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1817 1818
{
  DBUG_ENTER("key_cache_insert");
1819
  DBUG_PRINT("enter", ("fd: %u  pos: %lu  length: %u",
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1820 1821
               (uint) file,(ulong) filepos, length));

1822
  if (keycache->can_be_used)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1823 1824 1825 1826 1827
  {
    /* Key cache is used */
    reg1 BLOCK_LINK *block;
    uint read_length;
    int page_st;
1828
    int error;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1829 1830 1831

    do
    {
1832 1833
      uint offset;
      keycache_pthread_mutex_lock(&keycache->cache_lock);
1834
      if (!keycache->can_be_used)
1835 1836 1837 1838 1839 1840 1841
      {
	keycache_pthread_mutex_unlock(&keycache->cache_lock);
	DBUG_RETURN(0);
      }
      offset= (uint) (filepos & (keycache->key_cache_block_size-1));
      /* Read data into key cache from buff in key_cache_block_size incr. */
      filepos-= offset;
1842 1843 1844
      read_length= length;
      set_if_smaller(read_length, keycache->key_cache_block_size-offset);
      KEYCACHE_DBUG_ASSERT(read_length > 0);
1845

1846
      inc_counter_for_resize_op(keycache);
1847
      keycache->global_cache_r_requests++;
1848
      block= find_key_block(keycache, file, filepos, level, 0, &page_st);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1849 1850 1851 1852
      if (block->status != BLOCK_ERROR && page_st != PAGE_READ)
      {
        /* The requested page is to be read into the block buffer */
#if !defined(SERIALIZED_READ_FROM_CACHE)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1853
        keycache_pthread_mutex_unlock(&keycache->cache_lock);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1854 1855 1856 1857 1858 1859 1860 1861 1862
#endif

        /* Copy data from buff */
        if (!(read_length & 511))
          bmove512(block->buffer+offset, buff, read_length);
        else
          memcpy(block->buffer+offset, buff, (size_t) read_length);

#if !defined(SERIALIZED_READ_FROM_CACHE)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1863
        keycache_pthread_mutex_lock(&keycache->cache_lock);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1864 1865 1866 1867 1868 1869 1870 1871 1872 1873
#endif
        block->status= BLOCK_READ;
        block->length= read_length+offset;
      }

      remove_reader(block);
      /*
         Link the block into the LRU chain
         if it's the last submitted request for the block
      */
1874
      unreg_request(keycache, block, 1);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1875

1876
      error= (block->status & BLOCK_ERROR);
1877

1878
      dec_counter_for_resize_op(keycache);
1879

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1880
      keycache_pthread_mutex_unlock(&keycache->cache_lock);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1881

1882
      if (error)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1883 1884
        DBUG_RETURN(1);

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1885
      buff+= read_length;
1886
      filepos+= read_length+offset;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1887 1888 1889 1890 1891 1892 1893

    } while ((length-= read_length));
  }
  DBUG_RETURN(0);
}


1894
/*
1895 1896
  Write a buffer into a cached file.

1897 1898 1899
  SYNOPSIS

    key_cache_write()
1900
      keycache            pointer to a key cache data structure
1901 1902 1903
      file                handler for the file to write data to
      filepos             position in the file to write data to
      level               determines the weight of the data
1904
      buff                buffer with the data
1905 1906
      length              length of the buffer
      dont_write          if is 0 then all dirty pages involved in writing
1907 1908
                          should have been flushed from key cache

1909 1910 1911 1912 1913 1914 1915
  RETURN VALUE
    0 if a success, 1 - otherwise.

  NOTES.
    The function copies the data of size length from buff into buffers
    for key cache blocks that are  assigned to contain the portion of
    the file starting with position filepos.
1916
    It ensures that this data is flushed to the file if dont_write is FALSE.
1917 1918
    Filepos must be a multiple of 'block_length', but it doesn't
    have to be a multiple of key_cache_block_size;
1919
*/
1920

1921
int key_cache_write(KEY_CACHE *keycache,
1922 1923
                    File file, my_off_t filepos, int level,
                    byte *buff, uint length,
1924 1925
                    uint block_length  __attribute__((unused)),
                    int dont_write)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1926
{
1927
  reg1 BLOCK_LINK *block;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1928
  int error=0;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1929
  DBUG_ENTER("key_cache_write");
1930
  DBUG_PRINT("enter",
1931
	     ("fd: %u  pos: %lu  length: %u  block_length: %u  key_block_length: %u",
1932 1933
	      (uint) file, (ulong) filepos, length, block_length,
	      keycache ? keycache->key_cache_block_size : 0));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1934 1935

  if (!dont_write)
1936 1937
  {
    /* Force writing from buff into disk */
1938
    keycache->global_cache_write++;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1939
    if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1940
      DBUG_RETURN(1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1941
  }
1942

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1943
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1944 1945
  DBUG_EXECUTE("check_keycache",
               test_key_cache(keycache, "start of key_cache_write", 1););
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1946
#endif
1947

1948
  if (keycache->can_be_used)
1949 1950
  {
    /* Key cache is used */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1951
    uint read_length;
1952
    int page_st;
1953

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1954 1955
    do
    {
1956 1957
      uint offset;
      keycache_pthread_mutex_lock(&keycache->cache_lock);
1958
      if (!keycache->can_be_used)
1959 1960 1961 1962 1963 1964 1965
      {
	keycache_pthread_mutex_unlock(&keycache->cache_lock);
	goto no_key_cache;
      }
      offset= (uint) (filepos & (keycache->key_cache_block_size-1));
      /* Write data in key_cache_block_size increments */
      filepos-= offset;
1966 1967 1968
      read_length= length;
      set_if_smaller(read_length, keycache->key_cache_block_size-offset);
      KEYCACHE_DBUG_ASSERT(read_length > 0);
1969

1970
      inc_counter_for_resize_op(keycache);
1971
      keycache->global_cache_w_requests++;
1972
      block= find_key_block(keycache, file, filepos, level, 1, &page_st);
1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988
      if (!block)
      {
        /* It happens only for requests submitted during resize operation */
        dec_counter_for_resize_op(keycache);
	keycache_pthread_mutex_unlock(&keycache->cache_lock);
	if (dont_write)
        {
          keycache->global_cache_w_requests++;
          keycache->global_cache_write++;
          if (my_pwrite(file, (byte*) buff, length, filepos,
		        MYF(MY_NABP | MY_WAIT_IF_FULL)))
            error=1;
	}
        goto next_block;
      }

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1989
      if (block->status != BLOCK_ERROR && page_st != PAGE_READ &&
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
1990 1991 1992 1993
          (offset || read_length < keycache->key_cache_block_size))
        read_block(keycache, block,
                   offset + read_length >= keycache->key_cache_block_size?
                   offset : keycache->key_cache_block_size,
1994
                   offset,(my_bool)(page_st == PAGE_TO_BE_READ));
1995

1996
      if (!dont_write)
1997 1998 1999
      {
	/* buff has been written to disk at start */
        if ((block->status & BLOCK_CHANGED) &&
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2000 2001
            (!offset && read_length >= keycache->key_cache_block_size))
             link_to_file_list(keycache, block, block->hash_link->file, 1);
2002 2003
      }
      else if (! (block->status & BLOCK_CHANGED))
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2004
        link_to_changed_list(keycache, block);
2005

serg@serg.mylan's avatar
serg@serg.mylan committed
2006
      set_if_smaller(block->offset, offset);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2007
      set_if_bigger(block->length, read_length+offset);
2008

2009
      if (! (block->status & BLOCK_ERROR))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2010
      {
2011
        if (!(read_length & 511))
2012
	  bmove512(block->buffer+offset, buff, read_length);
2013
        else
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2014
          memcpy(block->buffer+offset, buff, (size_t) read_length);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2015
      }
2016 2017 2018

      block->status|=BLOCK_READ;

2019 2020
      /* Unregister the request */
      block->hash_link->requests--;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2021
      unreg_request(keycache, block, 1);
2022

2023 2024
      if (block->status & BLOCK_ERROR)
      {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2025
        keycache_pthread_mutex_unlock(&keycache->cache_lock);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2026
        error= 1;
2027 2028
        break;
      }
2029

2030
      dec_counter_for_resize_op(keycache);
2031

2032
      keycache_pthread_mutex_unlock(&keycache->cache_lock);
2033

2034
    next_block:
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2035
      buff+= read_length;
2036
      filepos+= read_length+offset;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2037
      offset= 0;
2038

bk@work.mysql.com's avatar
bk@work.mysql.com committed
2039
    } while ((length-= read_length));
2040
    goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2041
  }
2042 2043 2044 2045

no_key_cache:
  /* Key cache is not used */
  if (dont_write)
2046
  {
2047 2048 2049 2050 2051
    keycache->global_cache_w_requests++;
    keycache->global_cache_write++;
    if (my_pwrite(file, (byte*) buff, length, filepos,
		  MYF(MY_NABP | MY_WAIT_IF_FULL)))
      error=1;
2052
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2053

2054
end:
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2055
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2056 2057
  DBUG_EXECUTE("exec",
               test_key_cache(keycache, "end of key_cache_write", 1););
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2058
#endif
2059 2060
  DBUG_RETURN(error);
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2061 2062


2063 2064 2065
/*
  Free block: remove reference to it from hash table,
  remove it from the chain file of dirty/clean blocks
2066
  and add it to the free list.
2067 2068
*/

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2069
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2070
{
2071
  KEYCACHE_THREAD_TRACE("free block");
2072 2073 2074
  KEYCACHE_DBUG_PRINT("free_block",
                      ("block %u to be freed",BLOCK_NUMBER(block)));
  if (block->hash_link)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2075
  {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2076 2077 2078
    block->status|= BLOCK_REASSIGNED;
    wait_for_readers(keycache, block);
    unlink_hash(keycache, block->hash_link);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2079
  }
2080

2081
  unlink_changed(block);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2082 2083 2084
  block->status= 0;
  block->length= 0;
  block->offset= keycache->key_cache_block_size;
2085
  KEYCACHE_THREAD_TRACE("free block");
2086
  KEYCACHE_DBUG_PRINT("free_block",
2087
                      ("block is freed"));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2088 2089
  unreg_request(keycache, block, 0);
  block->hash_link= NULL;
2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100

  /* Remove the free block from the LRU ring. */
  unlink_block(keycache, block);
  if (block->temperature == BLOCK_WARM)
    keycache->warm_blocks--;
  block->temperature= BLOCK_COLD;
  /* Insert the free block in the free list. */
  block->next_used= keycache->free_block_list;
  keycache->free_block_list= block;
  /* Keep track of the number of currently unused blocks. */
  keycache->blocks_unused++;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2101 2102 2103
}


2104
static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2105
{
2106 2107
  return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
      ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2108 2109
}

2110

2111 2112 2113
/*
  Flush a portion of changed blocks to disk,
  free used blocks if requested
2114
*/
2115

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2116 2117
static int flush_cached_blocks(KEY_CACHE *keycache,
                               File file, BLOCK_LINK **cache,
2118 2119
                               BLOCK_LINK **end,
                               enum flush_type type)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2120
{
2121
  int error;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2122 2123
  int last_errno= 0;
  uint count= end-cache;
2124

2125
  /* Don't lock the cache during the flush */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2126
  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2127 2128 2129
  /*
     As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
     we are guarunteed no thread will change them
2130
  */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2131
  qsort((byte*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
2132

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2133
  keycache_pthread_mutex_lock(&keycache->cache_lock);
2134
  for ( ; cache != end ; cache++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2135
  {
2136
    BLOCK_LINK *block= *cache;
2137 2138

    KEYCACHE_DBUG_PRINT("flush_cached_blocks",
2139
                        ("block %u to be flushed", BLOCK_NUMBER(block)));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2140
    keycache_pthread_mutex_unlock(&keycache->cache_lock);
2141 2142 2143 2144
    error= my_pwrite(file,
		     block->buffer+block->offset,
		     block->length - block->offset,
                     block->hash_link->diskpos+ block->offset,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2145
                     MYF(MY_NABP | MY_WAIT_IF_FULL));
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2146
    keycache_pthread_mutex_lock(&keycache->cache_lock);
2147
    keycache->global_cache_write++;
2148
    if (error)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2149
    {
2150
      block->status|= BLOCK_ERROR;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2151
      if (!last_errno)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2152
        last_errno= errno ? errno : -1;
2153
    }
2154
    /*
2155 2156 2157 2158 2159
      Let to proceed for possible waiting requests to write to the block page.
      It might happen only during an operation to resize the key cache.
    */
    if (block->wqueue[COND_FOR_SAVED].last_thread)
      release_queue(&block->wqueue[COND_FOR_SAVED]);
2160 2161 2162
    /* type will never be FLUSH_IGNORE_CHANGED here */
    if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2163
      keycache->blocks_changed--;
2164
      keycache->global_blocks_changed--;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2165
      free_block(keycache, block);
2166
    }
2167
    else
2168
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2169 2170 2171
      block->status&= ~BLOCK_IN_FLUSH;
      link_to_file_list(keycache, block, file, 1);
      unreg_request(keycache, block, 1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2172
    }
2173

bk@work.mysql.com's avatar
bk@work.mysql.com committed
2174 2175 2176 2177 2178
  }
  return last_errno;
}


2179
/*
2180
  flush all key blocks for a file to disk, but don't do any mutex locks
2181

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2182
    flush_key_blocks_int()
2183
      keycache            pointer to a key cache data structure
2184 2185
      file                handler for the file to flush to
      flush_type          type of the flush
2186

2187 2188 2189 2190 2191 2192 2193 2194 2195 2196
  NOTES
    This function doesn't do any mutex locks because it needs to be called both
    from flush_key_blocks and flush_all_key_blocks (the later one does the
    mutex lock in the resize_key_cache() function).

  RETURN
    0   ok
    1  error
*/

2197
static int flush_key_blocks_int(KEY_CACHE *keycache,
2198
				File file, enum flush_type type)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2199
{
2200
  BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2201
  int last_errno= 0;
2202
  DBUG_ENTER("flush_key_blocks_int");
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2203
  DBUG_PRINT("enter",("file: %d  blocks_used: %d  blocks_changed: %d",
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2204
              file, keycache->blocks_used, keycache->blocks_changed));
2205

2206
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2207 2208
    DBUG_EXECUTE("check_keycache",
                 test_key_cache(keycache, "start of flush_key_blocks", 0););
2209
#endif
2210

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2211 2212
  cache= cache_buff;
  if (keycache->disk_blocks > 0 &&
2213
      (!my_disable_flush_key_blocks || type != FLUSH_KEEP))
2214 2215
  {
    /* Key cache exists and flush is not disabled */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2216 2217
    int error= 0;
    uint count= 0;
2218
    BLOCK_LINK **pos,**end;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2219
    BLOCK_LINK *first_in_switch= NULL;
2220 2221 2222 2223
    BLOCK_LINK *block, *next;
#if defined(KEYCACHE_DEBUG)
    uint cnt=0;
#endif
2224

bk@work.mysql.com's avatar
bk@work.mysql.com committed
2225 2226
    if (type != FLUSH_IGNORE_CHANGED)
    {
2227
      /*
2228 2229 2230
         Count how many key blocks we have to cache to be able
         to flush all dirty pages with minimum seek moves
      */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2231
      for (block= keycache->changed_blocks[FILE_HASH(file)] ;
2232
           block ;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2233
           block= block->next_changed)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2234
      {
2235
        if (block->hash_link->file == file)
2236
        {
2237
          count++;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2238
          KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used);
2239
        }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2240
      }
2241
      /* Allocate a new buffer only if its bigger than the one we have */
2242
      if (count > FLUSH_CACHE &&
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2243 2244
          !(cache= (BLOCK_LINK**) my_malloc(sizeof(BLOCK_LINK*)*count,
                                            MYF(0))))
2245
      {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2246 2247
        cache= cache_buff;
        count= FLUSH_CACHE;
2248
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2249
    }
2250

2251 2252
    /* Retrieve the blocks and write them to a buffer to be flushed */
restart:
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2253 2254
    end= (pos= cache)+count;
    for (block= keycache->changed_blocks[FILE_HASH(file)] ;
2255
         block ;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2256
         block= next)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2257
    {
2258 2259
#if defined(KEYCACHE_DEBUG)
      cnt++;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2260
      KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
2261
#endif
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2262
      next= block->next_changed;
2263
      if (block->hash_link->file == file)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2264
      {
2265
        /*
2266 2267 2268 2269 2270
           Mark the block with BLOCK_IN_FLUSH in order not to let
           other threads to use it for new pages and interfere with
           our sequence ot flushing dirty file pages
        */
        block->status|= BLOCK_IN_FLUSH;
2271

2272
        if (! (block->status & BLOCK_IN_SWITCH))
2273 2274 2275 2276 2277
        {
	  /*
	    We care only for the blocks for which flushing was not
	    initiated by other threads as a result of page swapping
          */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2278
          reg_requests(keycache, block, 1);
2279 2280 2281
          if (type != FLUSH_IGNORE_CHANGED)
          {
	    /* It's not a temporary file */
2282
            if (pos == end)
2283 2284 2285 2286
            {
	      /*
		This happens only if there is not enough
		memory for the big block
2287
              */
2288
              if ((error= flush_cached_blocks(keycache, file, cache,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2289
                                              end,type)))
2290 2291 2292 2293 2294
                last_errno=error;
              /*
		Restart the scan as some other thread might have changed
		the changed blocks chain: the blocks that were in switch
		state before the flush started have to be excluded
2295 2296 2297
              */
              goto restart;
            }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2298
            *pos++= block;
2299 2300 2301 2302
          }
          else
          {
            /* It's a temporary file */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2303
            keycache->blocks_changed--;
2304
	    keycache->global_blocks_changed--;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2305
            free_block(keycache, block);
2306 2307 2308
          }
        }
        else
2309 2310
        {
	  /* Link the block into a list of blocks 'in switch' */
2311
          unlink_changed(block);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2312
          link_changed(block, &first_in_switch);
2313
        }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2314 2315 2316 2317
      }
    }
    if (pos != cache)
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2318 2319
      if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
        last_errno= error;
2320 2321 2322 2323 2324
    }
    /* Wait until list of blocks in switch is empty */
    while (first_in_switch)
    {
#if defined(KEYCACHE_DEBUG)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2325
      cnt= 0;
2326
#endif
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2327
      block= first_in_switch;
2328
      {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2329
        struct st_my_thread_var *thread= my_thread_var;
2330 2331 2332
        add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
        do
        {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2333
          keycache_pthread_cond_wait(&thread->suspend,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2334
                                     &keycache->cache_lock);
2335 2336 2337 2338 2339
        }
        while (thread->next);
      }
#if defined(KEYCACHE_DEBUG)
      cnt++;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2340
      KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
2341
#endif
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2342 2343
    }
    /* The following happens very seldom */
2344
    if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2345
    {
2346 2347 2348
#if defined(KEYCACHE_DEBUG)
      cnt=0;
#endif
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2349
      for (block= keycache->file_blocks[FILE_HASH(file)] ;
2350
           block ;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2351
           block= next)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2352
      {
2353 2354
#if defined(KEYCACHE_DEBUG)
        cnt++;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2355
        KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
2356
#endif
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2357
        next= block->next_changed;
2358 2359 2360 2361
        if (block->hash_link->file == file &&
            (! (block->status & BLOCK_CHANGED)
             || type == FLUSH_IGNORE_CHANGED))
        {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2362 2363
          reg_requests(keycache, block, 1);
          free_block(keycache, block);
2364
        }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2365 2366
      }
    }
2367
  }
2368

bk@work.mysql.com's avatar
bk@work.mysql.com committed
2369
#ifndef DBUG_OFF
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2370 2371
  DBUG_EXECUTE("check_keycache",
               test_key_cache(keycache, "end of flush_key_blocks", 0););
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2372 2373
#endif
  if (cache != cache_buff)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2374
    my_free((gptr) cache, MYF(0));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2375
  if (last_errno)
2376
    errno=last_errno;                /* Return first error */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2377
  DBUG_RETURN(last_errno != 0);
2378 2379 2380
}


2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393
/*
  Flush all blocks for a file to disk

  SYNOPSIS

    flush_key_blocks()
      keycache            pointer to a key cache data structure
      file                handler for the file to flush to
      flush_type          type of the flush

  RETURN
    0   ok
    1  error
2394
*/
2395

2396
int flush_key_blocks(KEY_CACHE *keycache,
2397 2398 2399 2400
                     File file, enum flush_type type)
{
  int res;
  DBUG_ENTER("flush_key_blocks");
2401
  DBUG_PRINT("enter", ("keycache: 0x%lx", keycache));
2402

2403
  if (keycache->disk_blocks <= 0)
2404
    DBUG_RETURN(0);
2405 2406 2407 2408 2409
  keycache_pthread_mutex_lock(&keycache->cache_lock);
  inc_counter_for_resize_op(keycache);
  res= flush_key_blocks_int(keycache, file, type);
  dec_counter_for_resize_op(keycache);
  keycache_pthread_mutex_unlock(&keycache->cache_lock);
2410 2411 2412 2413
  DBUG_RETURN(res);
}


2414 2415
/*
  Flush all blocks in the key cache to disk
2416
*/
2417

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2418
static int flush_all_key_blocks(KEY_CACHE *keycache)
2419 2420 2421 2422
{
#if defined(KEYCACHE_DEBUG)
  uint cnt=0;
#endif
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2423
  while (keycache->blocks_changed > 0)
2424 2425
  {
    BLOCK_LINK *block;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2426
    for (block= keycache->used_last->next_used ; ; block=block->next_used)
2427 2428 2429 2430 2431
    {
      if (block->hash_link)
      {
#if defined(KEYCACHE_DEBUG)
        cnt++;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2432
        KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
2433
#endif
2434 2435
        if (flush_key_blocks_int(keycache, block->hash_link->file,
				 FLUSH_RELEASE))
2436 2437 2438
          return 1;
        break;
      }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2439
      if (block == keycache->used_last)
2440 2441 2442 2443 2444
        break;
    }
  }
  return 0;
}
2445 2446


2447 2448
#ifndef DBUG_OFF
/*
2449
  Test if disk-cache is ok
2450
*/
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2451
static void test_key_cache(KEY_CACHE *keycache __attribute__((unused)),
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2452
                           const char *where __attribute__((unused)),
2453 2454 2455
                           my_bool lock __attribute__((unused)))
{
  /* TODO */
2456
}
2457
#endif
2458

2459 2460 2461 2462 2463 2464
#if defined(KEYCACHE_TIMEOUT)

#define KEYCACHE_DUMP_FILE  "keycache_dump.txt"
#define MAX_QUEUE_LEN  100


igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2465
static void keycache_dump(KEY_CACHE *keycache)
2466
{
2467
  FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2468
  struct st_my_thread_var *thread_var= my_thread_var;
2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492
  struct st_my_thread_var *last;
  struct st_my_thread_var *thread;
  BLOCK_LINK *block;
  HASH_LINK *hash_link;
  KEYCACHE_PAGE *page;
  uint i;

  fprintf(keycache_dump_file, "thread:%u\n", thread->id);

  i=0;
  thread=last=waiting_for_hash_link.last_thread;
  fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
  if (thread)
    do
    {
      thread=thread->next;
      page= (KEYCACHE_PAGE *) thread->opt_info;
      fprintf(keycache_dump_file,
              "thread:%u, (file,filepos)=(%u,%lu)\n",
              thread->id,(uint) page->file,(ulong) page->filepos);
      if (++i == MAX_QUEUE_LEN)
        break;
    }
    while (thread != last);
2493

2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510
  i=0;
  thread=last=waiting_for_block.last_thread;
  fprintf(keycache_dump_file, "queue of threads waiting for block\n");
  if (thread)
    do
    {
      thread=thread->next;
      hash_link= (HASH_LINK *) thread->opt_info;
      fprintf(keycache_dump_file,
        "thread:%u hash_link:%u (file,filepos)=(%u,%lu)\n",
        thread->id, (uint) HASH_LINK_NUMBER(hash_link),
        (uint) hash_link->file,(ulong) hash_link->diskpos);
      if (++i == MAX_QUEUE_LEN)
        break;
    }
    while (thread != last);

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2511
  for (i=0 ; i< keycache->blocks_used ; i++)
2512 2513
  {
    int j;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2514
    block= &keycache->block_root[i];
2515
    hash_link= block->hash_link;
2516 2517 2518 2519 2520 2521 2522
    fprintf(keycache_dump_file,
            "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
            i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1),
            block->status, block->requests, block->condvar ? 1 : 0);
    for (j=0 ; j < 2; j++)
    {
      KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
2523
      thread= last= wqueue->last_thread;
2524 2525
      fprintf(keycache_dump_file, "queue #%d\n", j);
      if (thread)
2526
      {
2527 2528 2529 2530 2531 2532 2533 2534 2535
        do
        {
          thread=thread->next;
          fprintf(keycache_dump_file,
                  "thread:%u\n", thread->id);
          if (++i == MAX_QUEUE_LEN)
            break;
        }
        while (thread != last);
2536
      }
2537 2538 2539
    }
  }
  fprintf(keycache_dump_file, "LRU chain:");
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2540
  block= keycache= used_last;
2541
  if (block)
2542
  {
2543 2544
    do
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2545
      block= block->next_used;
2546 2547 2548
      fprintf(keycache_dump_file,
              "block:%u, ", BLOCK_NUMBER(block));
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
2549
    while (block != keycache->used_last);
2550
  }
2551
  fprintf(keycache_dump_file, "\n");
2552

2553
  fclose(keycache_dump_file);
2554 2555
}

2556
#endif /* defined(KEYCACHE_TIMEOUT) */
2557

2558
#if defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)
2559 2560


2561
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
2562 2563 2564 2565 2566 2567 2568 2569 2570
                                      pthread_mutex_t *mutex)
{
  int rc;
  struct timeval  now;            /* time when we started waiting        */
  struct timespec timeout;        /* timeout value for the wait function */
  struct timezone tz;
#if defined(KEYCACHE_DEBUG)
  int cnt=0;
#endif
2571 2572

  /* Get current time */
2573 2574
  gettimeofday(&now, &tz);
  /* Prepare timeout value */
2575 2576
  timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
  timeout.tv_nsec= now.tv_usec * 1000; /* timeval uses microseconds.         */
2577 2578 2579 2580 2581 2582 2583 2584 2585
                                        /* timespec uses nanoseconds.         */
                                        /* 1 nanosecond = 1000 micro seconds. */
  KEYCACHE_THREAD_TRACE_END("started waiting");
#if defined(KEYCACHE_DEBUG)
  cnt++;
  if (cnt % 100 == 0)
    fprintf(keycache_debug_log, "waiting...\n");
    fflush(keycache_debug_log);
#endif
2586
  rc= pthread_cond_timedwait(cond, mutex, &timeout);
2587 2588 2589
  KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
#if defined(KEYCACHE_DEBUG)
  if (rc == ETIMEDOUT)
2590
  {
2591 2592 2593 2594 2595
    fprintf(keycache_debug_log,"aborted by keycache timeout\n");
    fclose(keycache_debug_log);
    abort();
  }
#endif
2596

2597 2598
  if (rc == ETIMEDOUT)
    keycache_dump();
2599

2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613
#if defined(KEYCACHE_DEBUG)
  KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
#else
  assert(rc != ETIMEDOUT);
#endif
  return rc;
}
#else
#if defined(KEYCACHE_DEBUG)
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
                                      pthread_mutex_t *mutex)
{
  int rc;
  KEYCACHE_THREAD_TRACE_END("started waiting");
2614
  rc= pthread_cond_wait(cond, mutex);
2615 2616 2617 2618 2619
  KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
  return rc;
}
#endif
#endif /* defined(KEYCACHE_TIMEOUT) && !defined(__WIN__) */
2620

2621
#if defined(KEYCACHE_DEBUG)
2622 2623


2624
static int keycache_pthread_mutex_lock(pthread_mutex_t *mutex)
2625
{
2626
  int rc;
2627
  rc= pthread_mutex_lock(mutex);
2628 2629
  KEYCACHE_THREAD_TRACE_BEGIN("");
  return rc;
2630
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2631 2632


2633 2634 2635 2636 2637
static void keycache_pthread_mutex_unlock(pthread_mutex_t *mutex)
{
  KEYCACHE_THREAD_TRACE_END("");
  pthread_mutex_unlock(mutex);
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2638 2639


2640
static int keycache_pthread_cond_signal(pthread_cond_t *cond)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2641
{
2642 2643
  int rc;
  KEYCACHE_THREAD_TRACE("signal");
2644
  rc= pthread_cond_signal(cond);
2645 2646
  return rc;
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2647 2648


2649 2650 2651 2652
static int keycache_pthread_cond_broadcast(pthread_cond_t *cond)
{
  int rc;
  KEYCACHE_THREAD_TRACE("signal");
2653
  rc= pthread_cond_broadcast(cond);
2654 2655
  return rc;
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2656

2657
#if defined(KEYCACHE_DEBUG_LOG)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2658 2659


2660 2661 2662 2663 2664
static void keycache_debug_print(const char * fmt,...)
{
  va_list args;
  va_start(args,fmt);
  if (keycache_debug_log)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2665
  {
2666 2667
    VOID(vfprintf(keycache_debug_log, fmt, args));
    VOID(fputc('\n',keycache_debug_log));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2668
  }
2669 2670 2671
  va_end(args);
}
#endif /* defined(KEYCACHE_DEBUG_LOG) */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2672

2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683
#if defined(KEYCACHE_DEBUG_LOG)


void keycache_debug_log_close(void)
{
  if (keycache_debug_log)
    fclose(keycache_debug_log);
}
#endif /* defined(KEYCACHE_DEBUG_LOG) */

#endif /* defined(KEYCACHE_DEBUG) */