[t:3218] Merge the #3218 changes back onto the main line. The tests ran on...

[t:3218] Merge the #3218 changes back onto the main line. The tests ran on pointy (43 minutes elapsed, by the way. I did this merge (and added a comment): {{{ svn merge -r27967:28141 https://svn.tokutek.com/tokudb/toku/tokudb.3218 }}} Refs #3218. git-svn-id: file:///svn/toku/tokudb@28175 c7de825b-a66e-492c-adef-691d508d4ae1

[t:3218] Merge the #3218 changes back onto the main line. The tests ran on...
[t:3218] Merge the #3218 changes back onto the main line. The tests ran on pointy (43 minutes elapsed, by the way. I did this merge (and added a comment): {{{ svn merge -r27967:28141 https://svn.tokutek.com/tokudb/toku/tokudb.3218 }}} Refs #3218. git-svn-id: file:///svn/toku/tokudb@28175 c7de825b-a66e-492c-adef-691d508d4ae1
f275c656 · Bradley C. Kuszmaul · Yoni Fogel · d3361396 · f275c656 · f275c656
Commit f275c656 authored Feb 06, 2011 by Bradley C. Kuszmaul Committed by Yoni Fogel Apr 16, 2013
Hide whitespace changes
Inline Side-by-side

Showing with 119 additions and 48 deletions

newbrt/block_allocator.c newbrt/block_allocator.c +96 -42

newbrt/block_allocator.h newbrt/block_allocator.h +12 -1

newbrt/block_table.c newbrt/block_table.c +11 -5

No files found.
--- a/newbrt/block_allocator.c
+++ b/newbrt/block_allocator.c
@@ -9,17 +9,12 @@
 // It's not very fast at allocating or freeing.
 // Previous implementation used next_fit, but now use first_fit since we are moving blocks around to reduce file size.

-struct blockpair {
-    u_int64_t offset;
-    u_int64_t size;
-};
-
 struct block_allocator {
    u_int64_t reserve_at_beginning; // How much to reserve at the beginning
    u_int64_t alignment;            // Block alignment
    u_int64_t n_blocks; // How many blocks
    u_int64_t blocks_array_size; // How big is the blocks_array.  Must be >= n_blocks.
-    struct blockpair *blocks_array; // These blocks are sorted by address.
+    struct block_allocator_blockpair *blocks_array; // These blocks are sorted by address.
    u_int64_t n_bytes_in_use; // including the reserve_at_beginning
 };

@@ -77,42 +72,100 @@ destroy_block_allocator (BLOCK_ALLOCATOR *bap) {
 }

 static void
-grow_blocks_array (BLOCK_ALLOCATOR ba) {
-    if (ba->n_blocks >= ba->blocks_array_size) {
-	ba->blocks_array_size *= 2;
+grow_blocks_array_by (BLOCK_ALLOCATOR ba, u_int64_t n_to_add) {
+    if (ba->n_blocks + n_to_add > ba->blocks_array_size) {
+	int new_size = ba->n_blocks + n_to_add;
+	int at_least = ba->blocks_array_size * 2;
+	if (at_least > new_size) {
+	    new_size = at_least;
+	}
+	ba->blocks_array_size = new_size;
 	XREALLOC_N(ba->blocks_array_size, ba->blocks_array);
    }
 }

+
+static void
+grow_blocks_array (BLOCK_ALLOCATOR ba) {
+    grow_blocks_array_by(ba, 1);
+}
+
+static void
+merge_blockpairs_into (u_int64_t d, struct block_allocator_blockpair dst[/*d*/],
+		       u_int64_t s, struct block_allocator_blockpair src[/*s*/])
+// Effect: Merge dst[d] and src[s] into dst[d+s], merging in place.
+//   Initially dst and src hold sorted arrays (sorted by increasing offset).
+//   Finally dst contains all d+s elements sorted in order.
+//   dst must be large enough.
+//   Requires no overlaps.
+{
+    u_int64_t tail = d+s;
+    while (d>0 && s>0) {
+	struct block_allocator_blockpair *dp = &dst[d-1];
+	struct block_allocator_blockpair *sp = &src[s-1];
+	struct block_allocator_blockpair *tp = &dst[tail-1];
+	assert(tail>0);
+	if (dp->offset > sp->offset) {
+	    *tp = *dp;
+	    d--;
+	    tail--;
+	} else {
+	    *tp = *sp;
+	    s--;
+	    tail--;
+	}
+    }
+    while (d>0) {
+	struct block_allocator_blockpair *dp = &dst[d-1];
+	struct block_allocator_blockpair *tp = &dst[tail-1];
+	*tp = *dp;
+	d--;
+	tail--;
+    }
+    while (s>0) {
+	struct block_allocator_blockpair *sp = &src[s-1];
+	struct block_allocator_blockpair *tp = &dst[tail-1];
+	*tp = *sp;
+	s--;
+	tail--;
+    }
+}
+
+static int
+compare_blockpairs (const void *av, const void *bv) {
+    const struct block_allocator_blockpair *a = av;
+    const struct block_allocator_blockpair *b = bv;
+    if (a->offset < b->offset) return -1;
+    if (a->offset > b->offset) return +1;
+    return 0;
+}
+
 void
-block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t offset) {
-    assert(offset%ba->alignment == 0);
-    u_int64_t i;
+block_allocator_alloc_blocks_at (BLOCK_ALLOCATOR ba, u_int64_t n_blocks, struct block_allocator_blockpair *pairs)
+{
    VALIDATE(ba);
-    assert(offset >= ba->reserve_at_beginning);
-    grow_blocks_array(ba);
-    // Just do a linear search for the block
-    ba->n_bytes_in_use += size;
-    for (i=0; i<ba->n_blocks; i++) {
-	if (ba->blocks_array[i].offset > offset) {
-	    // allocate it in that slot
-	    // Don't do error checking, since we require that the blocks don't overlap.
-	    // Slide everything over
-	    memmove(ba->blocks_array+i+1, ba->blocks_array+i, (ba->n_blocks - i)*sizeof(struct blockpair));
-	    ba->blocks_array[i].offset = offset;
-	    ba->blocks_array[i].size   = size;
-	    ba->n_blocks++;
-	    VALIDATE(ba);
-	    return;
-	}
+    qsort(pairs, n_blocks, sizeof(*pairs), compare_blockpairs);
+    for (u_int64_t i=0; i<n_blocks; i++) {
+	assert(pairs[i].offset >= ba->reserve_at_beginning);
+	assert(pairs[i].offset%ba->alignment == 0);
+	ba->n_bytes_in_use += pairs[i].size;
    }
-    // Goes at the end
-    ba->blocks_array[ba->n_blocks].offset = offset;
-    ba->blocks_array[ba->n_blocks].size   = size;
-    ba->n_blocks++;
+    grow_blocks_array_by(ba, n_blocks);
+    merge_blockpairs_into(ba->n_blocks, ba->blocks_array,
+			  n_blocks,     pairs);
+    ba->n_blocks += n_blocks;
    VALIDATE(ba);
 }

+void
+block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t offset) {
+    struct block_allocator_blockpair p = {.size = size, .offset=offset};
+    // Just do a linear search for the block.
+    // This data structure is a sorted array (no gaps or anything), so the search isn't really making this any slower than the insertion.
+    // To speed up the insertion when opening a file, we provide the block_allocator_alloc_blocks_at function.
+    block_allocator_alloc_blocks_at(ba, 1, &p);
+}
+
 static inline u_int64_t
 align (u_int64_t value, BLOCK_ALLOCATOR ba)
 // Effect: align a value by rounding up.
@@ -137,8 +190,8 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs
 	u_int64_t end_of_reserve = align(ba->reserve_at_beginning, ba);
 	if (end_of_reserve + size <= ba->blocks_array[0].offset ) {
 	    // Check to see if the space immediately after the reserve is big enough to hold the new block.
-	    struct blockpair *bp = &ba->blocks_array[0];
-	    memmove(bp+1, bp, (ba->n_blocks)*sizeof(struct blockpair));
+	    struct block_allocator_blockpair *bp = &ba->blocks_array[0];
+	    memmove(bp+1, bp, (ba->n_blocks)*sizeof(*bp));
 	    bp[0].offset = end_of_reserve;
 	    bp[0].size   = size;
 	    ba->n_blocks++;
@@ -149,13 +202,13 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs
    }
    for (u_int64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) {
 	// Consider the space after blocknum
-	struct blockpair *bp = &ba->blocks_array[blocknum];
+	struct block_allocator_blockpair *bp = &ba->blocks_array[blocknum];
 	u_int64_t this_offset = bp[0].offset;
 	u_int64_t this_size   = bp[0].size;
 	u_int64_t answer_offset = align(this_offset + this_size, ba);
 	if (answer_offset + size > bp[1].offset) continue; // The block we want doesn't fit after this block.
 	// It fits, so allocate it here.
-	memmove(bp+2, bp+1, (ba->n_blocks - blocknum -1)*sizeof(struct blockpair));
+	memmove(bp+2, bp+1, (ba->n_blocks - blocknum -1)*sizeof(*bp));
 	bp[1].offset = answer_offset;
 	bp[1].size   = size;
 	ba->n_blocks++;
@@ -164,7 +217,8 @@ block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offs
 	return;
    }
    // It didn't fit anywhere, so fit it on the end.
-    struct blockpair *bp = &ba->blocks_array[ba->n_blocks];
+    assert(ba->n_blocks < ba->blocks_array_size);
+    struct block_allocator_blockpair *bp = &ba->blocks_array[ba->n_blocks];
    u_int64_t answer_offset = align(bp[-1].offset+bp[-1].size, ba);
    bp->offset = answer_offset;
    bp->size   = size;
@@ -206,7 +260,7 @@ block_allocator_free_block (BLOCK_ALLOCATOR ba, u_int64_t offset) {
    int64_t bn = find_block(ba, offset);
    assert(bn>=0); // we require that there is a block with that offset.  Might as well abort if no such block exists.
    ba->n_bytes_in_use -= ba->blocks_array[bn].size;
-    memmove(&ba->blocks_array[bn], &ba->blocks_array[bn+1], (ba->n_blocks-bn-1) * sizeof(struct blockpair));
+    memmove(&ba->blocks_array[bn], &ba->blocks_array[bn+1], (ba->n_blocks-bn-1) * sizeof(struct block_allocator_blockpair));
    ba->n_blocks--;
    VALIDATE(ba);
 }
@@ -222,7 +276,7 @@ u_int64_t
 block_allocator_allocated_limit (BLOCK_ALLOCATOR ba) {
    if (ba->n_blocks==0) return ba->reserve_at_beginning;
    else {
-	struct blockpair *last = &ba->blocks_array[ba->n_blocks-1];
+	struct block_allocator_blockpair *last = &ba->blocks_array[ba->n_blocks-1];
 	return last->offset + last->size;
    }
 }
@@ -260,7 +314,7 @@ block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION
    if (ba->n_blocks > 0) {
        //Deal with space before block 0 and after reserve:
        {
-            struct blockpair *bp = &ba->blocks_array[0];
+            struct block_allocator_blockpair *bp = &ba->blocks_array[0];
            assert(bp->offset >= align(ba->reserve_at_beginning, ba));
            uint64_t free_space = bp->offset - align(ba->reserve_at_beginning, ba);
            if (free_space > 0) {
@@ -275,7 +329,7 @@ block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION
        //Deal with space between blocks:
        for (u_int64_t blocknum = 0; blocknum +1 < ba->n_blocks; blocknum ++) {
            // Consider the space after blocknum
-            struct blockpair *bp = &ba->blocks_array[blocknum];
+            struct block_allocator_blockpair *bp = &ba->blocks_array[blocknum];
            uint64_t this_offset = bp[0].offset;
            uint64_t this_size   = bp[0].size;
            uint64_t end_of_this_block = align(this_offset+this_size, ba);
@@ -292,7 +346,7 @@ block_allocator_get_unused_statistics(BLOCK_ALLOCATOR ba, TOKU_DB_FRAGMENTATION

        //Deal with space after last block
        {
-            struct blockpair *bp = &ba->blocks_array[ba->n_blocks-1];
+            struct block_allocator_blockpair *bp = &ba->blocks_array[ba->n_blocks-1];
            uint64_t this_offset = bp[0].offset;
            uint64_t this_size   = bp[0].size;
            uint64_t end_of_this_block = align(this_offset+this_size, ba);

--- a/newbrt/block_allocator.h
+++ b/newbrt/block_allocator.h
@@ -62,13 +62,24 @@ void
 block_allocator_alloc_block_at (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t offset);
 // Effect: Allocate a block of the specified size at a particular offset.
 //  Aborts if anything goes wrong.
+//  The performance of this function may be as bad as Theta(N), where N is the number of blocks currently in use.
+// Usage note: To allocate several blocks (e.g., when opening a BRT),  use block_allocator_alloc_blocks_at().
 // Requires: The resulting block may not overlap any other allocated block.
 //  And the offset must be a multiple of the block alignment.
 // Parameters:
 //  ba (IN/OUT): The block allocator.  (Modifies ba.)
 //  size (IN):   The size of the block.
 //  offset (IN): The location of the block.
-//
+
+
+struct block_allocator_blockpair {
+    u_int64_t offset;
+    u_int64_t size;
+};
+void
+block_allocator_alloc_blocks_at (BLOCK_ALLOCATOR ba, u_int64_t n_blocks, struct block_allocator_blockpair *pairs);
+// Effect: Take pairs in any order, and add them all, as if we did block_allocator_alloc_block() on each pair.
+//  This should run in time O(N + M log M) where N is the number of blocks in ba, and M is the number of new blocks.

 void
 block_allocator_alloc_block (BLOCK_ALLOCATOR ba, u_int64_t size, u_int64_t *offset);

--- a/newbrt/block_table.c
+++ b/newbrt/block_table.c
@@ -785,14 +785,20 @@ static void
 blocktable_note_translation (BLOCK_ALLOCATOR allocator, struct translation *t) {
    //This is where the space for them will be reserved (in addition to normal blocks).
    //See RESERVED_BLOCKNUMS
-    int64_t i;
-    for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+
+    // Previously this added blocks one at a time.  Now we make an array and pass it in so it can be sorted and merged.  See #3218.
+    struct block_allocator_blockpair *MALLOC_N(t->smallest_never_used_blocknum.b, pairs);
+    u_int64_t n_pairs = 0;
+    for (int64_t i=0; i<t->smallest_never_used_blocknum.b; i++) {
        struct block_translation_pair pair = t->block_translation[i];
-        if (pair.size > 0) {
+	if (pair.size > 0) {
            assert(pair.u.diskoff != diskoff_unused);
-            block_allocator_alloc_block_at(allocator, pair.size, pair.u.diskoff);
-        }
+	    pairs[n_pairs++] = (struct block_allocator_blockpair){.size   = pair.size,
+								  .offset = pair.u.diskoff};
+	}
    }
+    block_allocator_alloc_blocks_at(allocator, n_pairs, pairs);
+    toku_free(pairs);
 }