Commit 43e55513 authored by Leif Walsh's avatar Leif Walsh Committed by Yoni Fogel

[t:4635] merging fractal tree and handlerton code to main

git-svn-id: file:///svn/toku/tokudb@41503 c7de825b-a66e-492c-adef-691d508d4ae1
parent b9a7c166
......@@ -85,7 +85,7 @@ fastcheckonlyfailydb: fastbuildtests
clean: $(patsubst %,%.dir.clean,$(SRCDIRS)) cleanlib
cleanlib:
rm -rf lib/*.$(SOEXT) lib/*.$(AEXT) lib/*.bundle
rm -rf lib/*.$(SOEXT) $(filter-out lib/liblzma_gcc_dbg.$(AEXT) lib/liblzma_gcc_opt.$(AEXT) lib/liblzma_icc_opt.$(AEXT),$(wildcard lib/*.$(AEXT))) lib/*.bundle lib/*.olist
# This does not work, and probably hasn't worked since revision ~2000
# install:
......
......@@ -429,6 +429,8 @@ static void print_db_struct (void) {
"int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",
"int (*get_readpagesize)(DB*,u_int32_t*)",
"int (*set_readpagesize)(DB*,u_int32_t)",
"int (*get_compression_method)(DB*,TOKU_COMPRESSION_METHOD*)",
"int (*set_compression_method)(DB*,TOKU_COMPRESSION_METHOD)",
"int (*set_indexer)(DB*, DB_INDEXER*)",
"void (*get_indexer)(DB*, DB_INDEXER**)",
"int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)",
......@@ -554,6 +556,17 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
printf(" u_int64_t bt_verify_time_sec; /* Time of last verification, in seconds */\n");
printf("} DB_BTREE_STAT64;\n");
// compression methods
printf("typedef enum toku_compression_method {\n");
printf(" TOKU_NO_COMPRESSION = 0,\n"); // "identity" compression
printf(" TOKU_ZLIB_METHOD = 8,\n"); // RFC 1950 says use 8 for zlib. It reserves 15 to allow more bytes.
printf(" TOKU_QUICKLZ_METHOD = 9,\n"); // We use 9 for QUICKLZ (the QLZ compression level is stored int he high-order nibble). I couldn't find any standard for any other numbers, so I just use 9. -Bradley
printf(" TOKU_LZMA_METHOD = 10,\n"); // We use 10 for LZMA. (Note the compression level is stored in the high-order nibble).
printf(" TOKU_FAST_COMPRESSION_METHOD = 1,\n"); // friendlier names
printf(" TOKU_SMALL_COMPRESSION_METHOD = 2,\n");
printf(" TOKU_DEFAULT_COMPRESSION_METHOD = TOKU_FAST_COMPRESSION_METHOD,\n"); // default is quicklz
printf("} TOKU_COMPRESSION_METHOD;\n");
//bulk loader
printf("typedef struct __toku_loader DB_LOADER;\n");
printf("struct __toku_loader_internal;\n");
......
......@@ -43,6 +43,15 @@ typedef struct __toku_db_btree_stat64 {
u_int64_t bt_modify_time_sec; /* Time of last serialization, in seconds */
u_int64_t bt_verify_time_sec; /* Time of last verification, in seconds */
} DB_BTREE_STAT64;
typedef enum toku_compression_method {
TOKU_NO_COMPRESSION = 0,
TOKU_ZLIB_METHOD = 8,
TOKU_QUICKLZ_METHOD = 9,
TOKU_LZMA_METHOD = 10,
TOKU_FAST_COMPRESSION_METHOD = 1,
TOKU_SMALL_COMPRESSION_METHOD = 2,
TOKU_DEFAULT_COMPRESSION_METHOD = TOKU_FAST_COMPRESSION_METHOD,
} TOKU_COMPRESSION_METHOD;
typedef struct __toku_loader DB_LOADER;
struct __toku_loader_internal;
struct __toku_loader {
......@@ -301,6 +310,8 @@ struct __toku_db {
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
int (*get_compression_method)(DB*,TOKU_COMPRESSION_METHOD*);
int (*set_compression_method)(DB*,TOKU_COMPRESSION_METHOD);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
......
/**
* \file api/lzma.h
* \brief The public API of liblzma data compression library
*
* liblzma is a public domain general-purpose data compression library with
* a zlib-like API. The native file format is .xz, but also the old .lzma
* format and raw (no headers) streams are supported. Multiple compression
* algorithms (filters) are supported. Currently LZMA2 is the primary filter.
*
* liblzma is part of XZ Utils <http://tukaani.org/xz/>. XZ Utils includes
* a gzip-like command line tool named xz and some other tools. XZ Utils
* is developed and maintained by Lasse Collin.
*
* Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK
* <http://7-zip.org/sdk.html>.
*
* The SHA-256 implementation is based on the public domain code found from
* 7-Zip <http://7-zip.org/>, which has a modified version of the public
* domain SHA-256 code found from Crypto++ <http://www.cryptopp.com/>.
* The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai.
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef LZMA_H
#define LZMA_H
/*****************************
* Required standard headers *
*****************************/
/*
* liblzma API headers need some standard types and macros. To allow
* including lzma.h without requiring the application to include other
* headers first, lzma.h includes the required standard headers unless
* they already seem to be included already or if LZMA_MANUAL_HEADERS
* has been defined.
*
* Here's what types and macros are needed and from which headers:
* - stddef.h: size_t, NULL
* - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n),
* UINT32_MAX, UINT64_MAX
*
* However, inttypes.h is a little more portable than stdint.h, although
* inttypes.h declares some unneeded things compared to plain stdint.h.
*
* The hacks below aren't perfect, specifically they assume that inttypes.h
* exists and that it typedefs at least uint8_t, uint32_t, and uint64_t,
* and that, in case of incomplete inttypes.h, unsigned int is 32-bit.
* If the application already takes care of setting up all the types and
* macros properly (for example by using gnulib's stdint.h or inttypes.h),
* we try to detect that the macros are already defined and don't include
* inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to
* force this file to never include any system headers.
*
* Some could argue that liblzma API should provide all the required types,
* for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was
* seen unnecessary mess, since most systems already provide all the necessary
* types and macros in the standard headers.
*
* Note that liblzma API still has lzma_bool, because using stdbool.h would
* break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't
* necessarily the same as sizeof(bool) in C++.
*/
#ifndef LZMA_MANUAL_HEADERS
/*
* I suppose this works portably also in C++. Note that in C++,
* we need to get size_t into the global namespace.
*/
# include <stddef.h>
/*
* Skip inttypes.h if we already have all the required macros. If we
* have the macros, we assume that we have the matching typedefs too.
*/
# if !defined(UINT32_C) || !defined(UINT64_C) \
|| !defined(UINT32_MAX) || !defined(UINT64_MAX)
/*
* MSVC has no C99 support, and thus it cannot be used to
* compile liblzma. The liblzma API has to still be usable
* from MSVC, so we need to define the required standard
* integer types here.
*/
# if defined(_WIN32) && defined(_MSC_VER)
typedef unsigned __int8 uint8_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
# else
/* Use the standard inttypes.h. */
# ifdef __cplusplus
/*
* C99 sections 7.18.2 and 7.18.4 specify that
* in C++ implementations define the limit
* and constant macros only if specifically
* requested. Note that if you want the
* format macros (PRIu64 etc.) too, you need
* to define __STDC_FORMAT_MACROS before
* including lzma.h, since re-including
* inttypes.h with __STDC_FORMAT_MACROS
* defined doesn't necessarily work.
*/
# ifndef __STDC_LIMIT_MACROS
# define __STDC_LIMIT_MACROS 1
# endif
# ifndef __STDC_CONSTANT_MACROS
# define __STDC_CONSTANT_MACROS 1
# endif
# endif
# include <inttypes.h>
# endif
/*
* Some old systems have only the typedefs in inttypes.h, and
* lack all the macros. For those systems, we need a few more
* hacks. We assume that unsigned int is 32-bit and unsigned
* long is either 32-bit or 64-bit. If these hacks aren't
* enough, the application has to setup the types manually
* before including lzma.h.
*/
# ifndef UINT32_C
# if defined(_WIN32) && defined(_MSC_VER)
# define UINT32_C(n) n ## UI32
# else
# define UINT32_C(n) n ## U
# endif
# endif
# ifndef UINT64_C
# if defined(_WIN32) && defined(_MSC_VER)
# define UINT64_C(n) n ## UI64
# else
/* Get ULONG_MAX. */
# include <limits.h>
# if ULONG_MAX == 4294967295UL
# define UINT64_C(n) n ## ULL
# else
# define UINT64_C(n) n ## UL
# endif
# endif
# endif
# ifndef UINT32_MAX
# define UINT32_MAX (UINT32_C(4294967295))
# endif
# ifndef UINT64_MAX
# define UINT64_MAX (UINT64_C(18446744073709551615))
# endif
# endif
#endif /* ifdef LZMA_MANUAL_HEADERS */
/******************
* LZMA_API macro *
******************/
/*
* Some systems require (or at least recommend) that the functions and
* function pointers are declared specially in the headers. LZMA_API_IMPORT
* is for importing symbols and LZMA_API_CALL is to specify calling
* convention.
*
* By default it is assumed that the application will link dynamically
* against liblzma. #define LZMA_API_STATIC in your application if you
* want to link against static liblzma. If you don't care about portability
* to operating systems like Windows, or at least don't care about linking
* against static liblzma on them, don't worry about LZMA_API_STATIC. That
* is, most developers will never need to use LZMA_API_STATIC.
*
* Cygwin is a special case on Windows. We rely on GCC doing the right thing
* and thus don't use dllimport and don't specify the calling convention.
*/
#ifndef LZMA_API_IMPORT
# if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__CYGWIN__)
# define LZMA_API_IMPORT __declspec(dllimport)
# else
# define LZMA_API_IMPORT
# endif
#endif
#ifndef LZMA_API_CALL
# if defined(_WIN32) && !defined(__CYGWIN__)
# define LZMA_API_CALL __cdecl
# else
# define LZMA_API_CALL
# endif
#endif
#ifndef LZMA_API
# define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL
#endif
/***********
* nothrow *
***********/
/*
* None of the functions in liblzma may throw an exception. Even
* the functions that use callback functions won't throw exceptions,
* because liblzma would break if a callback function threw an exception.
*/
#ifndef lzma_nothrow
# if defined(__cplusplus)
# define lzma_nothrow throw()
# elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
# define lzma_nothrow __attribute__((__nothrow__))
# else
# define lzma_nothrow
# endif
#endif
/********************
* GNU C extensions *
********************/
/*
* GNU C extensions are used conditionally in the public API. It doesn't
* break anything if these are sometimes enabled and sometimes not, only
* affects warnings and optimizations.
*/
#if __GNUC__ >= 3
# ifndef lzma_attribute
# define lzma_attribute(attr) __attribute__(attr)
# endif
# ifndef lzma_restrict
# define lzma_restrict __restrict__
# endif
/* warn_unused_result was added in GCC 3.4. */
# ifndef lzma_attr_warn_unused_result
# if __GNUC__ == 3 && __GNUC_MINOR__ < 4
# define lzma_attr_warn_unused_result
# endif
# endif
#else
# ifndef lzma_attribute
# define lzma_attribute(attr)
# endif
# ifndef lzma_restrict
# if __STDC_VERSION__ >= 199901L
# define lzma_restrict restrict
# else
# define lzma_restrict
# endif
# endif
#endif
#ifndef lzma_attr_pure
# define lzma_attr_pure lzma_attribute((__pure__))
#endif
#ifndef lzma_attr_const
# define lzma_attr_const lzma_attribute((__const__))
#endif
#ifndef lzma_attr_warn_unused_result
# define lzma_attr_warn_unused_result \
lzma_attribute((__warn_unused_result__))
#endif
/**************
* Subheaders *
**************/
#ifdef __cplusplus
extern "C" {
#endif
/*
* Subheaders check that this is defined. It is to prevent including
* them directly from applications.
*/
#define LZMA_H_INTERNAL 1
/* Basic features */
#include "lzma/version.h"
#include "lzma/base.h"
#include "lzma/vli.h"
#include "lzma/check.h"
/* Filters */
#include "lzma/filter.h"
#include "lzma/subblock.h"
#include "lzma/bcj.h"
#include "lzma/delta.h"
#include "lzma/lzma.h"
/* Container formats */
#include "lzma/container.h"
/* Advanced features */
#include "lzma/stream_flags.h"
#include "lzma/block.h"
#include "lzma/index.h"
#include "lzma/index_hash.h"
/*
* All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications
* re-including the subheaders.
*/
#undef LZMA_H_INTERNAL
#ifdef __cplusplus
}
#endif
#endif /* ifndef LZMA_H */
This diff is collapsed.
/**
* \file lzma/bcj.h
* \brief Branch/Call/Jump conversion filters
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/* Filter IDs for lzma_filter.id */
#define LZMA_FILTER_X86 LZMA_VLI_C(0x04)
/**<
* Filter for x86 binaries
*/
#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05)
/**<
* Filter for Big endian PowerPC binaries
*/
#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06)
/**<
* Filter for IA64 (Itanium) binaries.
*/
#define LZMA_FILTER_ARM LZMA_VLI_C(0x07)
/**<
* Filter for ARM binaries.
*/
#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08)
/**<
* Filter for ARMThumb binaries.
*/
#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09)
/**<
* Filter for SPARC binaries.
*/
/**
* \brief Options for BCJ filters
*
* The BCJ filters never change the size of the data. Specifying options
* for them is optional: if pointer to options is NULL, default value is
* used. You probably never need to specify options to BCJ filters, so just
* set the options pointer to NULL and be happy.
*
* If options with non-default values have been specified when encoding,
* the same options must also be specified when decoding.
*
* \note At the moment, none of the BCJ filters support
* LZMA_SYNC_FLUSH. If LZMA_SYNC_FLUSH is specified,
* LZMA_OPTIONS_ERROR will be returned. If there is need,
* partial support for LZMA_SYNC_FLUSH can be added in future.
* Partial means that flushing would be possible only at
* offsets that are multiple of 2, 4, or 16 depending on
* the filter, except x86 which cannot be made to support
* LZMA_SYNC_FLUSH predictably.
*/
typedef struct {
/**
* \brief Start offset for conversions
*
* This setting is useful only when the same filter is used
* _separately_ for multiple sections of the same executable file,
* and the sections contain cross-section branch/call/jump
* instructions. In that case it is benefical to set the start
* offset of the non-first sections so that the relative addresses
* of the cross-section branch/call/jump instructions will use the
* same absolute addresses as in the first section.
*
* When the pointer to options is NULL, the default value (zero)
* is used.
*/
uint32_t start_offset;
} lzma_options_bcj;
This diff is collapsed.
/**
* \file lzma/check.h
* \brief Integrity checks
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Type of the integrity check (Check ID)
*
* The .xz format supports multiple types of checks that are calculated
* from the uncompressed data. They vary in both speed and ability to
* detect errors.
*/
typedef enum {
LZMA_CHECK_NONE = 0,
/**<
* No Check is calculated.
*
* Size of the Check field: 0 bytes
*/
LZMA_CHECK_CRC32 = 1,
/**<
* CRC32 using the polynomial from the IEEE 802.3 standard
*
* Size of the Check field: 4 bytes
*/
LZMA_CHECK_CRC64 = 4,
/**<
* CRC64 using the polynomial from the ECMA-182 standard
*
* Size of the Check field: 8 bytes
*/
LZMA_CHECK_SHA256 = 10
/**<
* SHA-256
*
* Size of the Check field: 32 bytes
*/
} lzma_check;
/**
* \brief Maximum valid Check ID
*
* The .xz file format specification specifies 16 Check IDs (0-15). Some
* of them are only reserved, that is, no actual Check algorithm has been
* assigned. When decoding, liblzma still accepts unknown Check IDs for
* future compatibility. If a valid but unsupported Check ID is detected,
* liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK,
* LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK in container.h.
*/
#define LZMA_CHECK_ID_MAX 15
/**
* \brief Test if the given Check ID is supported
*
* Return true if the given Check ID is supported by this liblzma build.
* Otherwise false is returned. It is safe to call this with a value that
* is not in the range [0, 15]; in that case the return value is always false.
*
* You can assume that LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always
* supported (even if liblzma is built with limited features).
*/
extern LZMA_API(lzma_bool) lzma_check_is_supported(lzma_check check)
lzma_nothrow lzma_attr_const;
/**
* \brief Get the size of the Check field with the given Check ID
*
* Although not all Check IDs have a check algorithm associated, the size of
* every Check is already frozen. This function returns the size (in bytes) of
* the Check field with the specified Check ID. The values are:
* { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 }
*
* If the argument is not in the range [0, 15], UINT32_MAX is returned.
*/
extern LZMA_API(uint32_t) lzma_check_size(lzma_check check)
lzma_nothrow lzma_attr_const;
/**
* \brief Maximum size of a Check field
*/
#define LZMA_CHECK_SIZE_MAX 64
/**
* \brief Calculate CRC32
*
* Calculate CRC32 using the polynomial from the IEEE 802.3 standard.
*
* \param buf Pointer to the input buffer
* \param size Size of the input buffer
* \param crc Previously returned CRC value. This is used to
* calculate the CRC of a big buffer in smaller chunks.
* Set to zero when starting a new calculation.
*
* \return Updated CRC value, which can be passed to this function
* again to continue CRC calculation.
*/
extern LZMA_API(uint32_t) lzma_crc32(
const uint8_t *buf, size_t size, uint32_t crc)
lzma_nothrow lzma_attr_pure;
/**
* \brief Calculate CRC64
*
* Calculate CRC64 using the polynomial from the ECMA-182 standard.
*
* This function is used similarly to lzma_crc32(). See its documentation.
*/
extern LZMA_API(uint64_t) lzma_crc64(
const uint8_t *buf, size_t size, uint64_t crc)
lzma_nothrow lzma_attr_pure;
/*
* SHA-256 functions are currently not exported to public API.
* Contact Lasse Collin if you think it should be.
*/
/**
* \brief Get the type of the integrity check
*
* This function can be called only immediatelly after lzma_code() has
* returned LZMA_NO_CHECK, LZMA_UNSUPPORTED_CHECK, or LZMA_GET_CHECK.
* Calling this function in any other situation has undefined behavior.
*/
extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm)
lzma_nothrow;
This diff is collapsed.
/**
* \file lzma/delta.h
* \brief Delta filter
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Filter ID
*
* Filter ID of the Delta filter. This is used as lzma_filter.id.
*/
#define LZMA_FILTER_DELTA LZMA_VLI_C(0x03)
/**
* \brief Type of the delta calculation
*
* Currently only byte-wise delta is supported. Other possible types could
* be, for example, delta of 16/32/64-bit little/big endian integers, but
* these are not currently planned since byte-wise delta is almost as good.
*/
typedef enum {
LZMA_DELTA_TYPE_BYTE
} lzma_delta_type;
/**
* \brief Options for the Delta filter
*
* These options are needed by both encoder and decoder.
*/
typedef struct {
/** For now, this must always be LZMA_DELTA_TYPE_BYTE. */
lzma_delta_type type;
/**
* \brief Delta distance
*
* With the only currently supported type, LZMA_DELTA_TYPE_BYTE,
* the distance is as bytes.
*
* Examples:
* - 16-bit stereo audio: distance = 4 bytes
* - 24-bit RGB image data: distance = 3 bytes
*/
uint32_t dist;
# define LZMA_DELTA_DIST_MIN 1
# define LZMA_DELTA_DIST_MAX 256
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. You should not touch these, because the names
* of these variables may change. These are and will never be used
* when type is LZMA_DELTA_TYPE_BYTE, so it is safe to leave these
* uninitialized.
*/
uint32_t reserved_int1;
uint32_t reserved_int2;
uint32_t reserved_int3;
uint32_t reserved_int4;
void *reserved_ptr1;
void *reserved_ptr2;
} lzma_options_delta;
This diff is collapsed.
/**
* \file lzma/hardware.h
* \brief Hardware information
*
* Since liblzma can consume a lot of system resources, it also provides
* ways to limit the resource usage. Applications linking against liblzma
* need to do the actual decisions how much resources to let liblzma to use.
* To ease making these decisions, liblzma provides functions to find out
* the relevant capabilities of the underlaying hardware. Currently there
* is only a function to find out the amount of RAM, but in the future there
* will be also a function to detect how many concurrent threads the system
* can run.
*
* \note On some operating systems, these function may temporarily
* load a shared library or open file descriptor(s) to find out
* the requested hardware information. Unless the application
* assumes that specific file descriptors are not touched by
* other threads, this should have no effect on thread safety.
* Possible operations involving file descriptors will restart
* the syscalls if they return EINTR.
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Get the total amount of physical memory (RAM) in bytes
*
* This function may be useful when determining a reasonable memory
* usage limit for decompressing or how much memory it is OK to use
* for compressing.
*
* \return On success, the total amount of physical memory in bytes
* is returned. If the amount of RAM cannot be determined,
* zero is returned. This can happen if an error occurs
* or if there is no code in liblzma to detect the amount
* of RAM on the specific operating system.
*/
extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow;
This diff is collapsed.
/**
* \file lzma/index_hash.h
* \brief Validates Index by using a hash function
*
* Hashing makes it possible to use constant amount of memory to validate
* Index of arbitrary size.
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Opaque data type to hold the Index hash
*/
typedef struct lzma_index_hash_s lzma_index_hash;
/**
* \brief Allocate and initialize a new lzma_index_hash structure
*
* If index_hash is NULL, a new lzma_index_hash structure is allocated,
* initialized, and a pointer to it returned. If allocation fails, NULL
* is returned.
*
* If index_hash is non-NULL, it is reinitialized and the same pointer
* returned. In this case, return value cannot be NULL or a different
* pointer than the index_hash that was given as an argument.
*/
extern LZMA_API(lzma_index_hash *) lzma_index_hash_init(
lzma_index_hash *index_hash, lzma_allocator *allocator)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Deallocate lzma_index_hash structure
*/
extern LZMA_API(void) lzma_index_hash_end(
lzma_index_hash *index_hash, lzma_allocator *allocator)
lzma_nothrow;
/**
* \brief Add a new Record to an Index hash
*
* \param index Pointer to a lzma_index_hash structure
* \param unpadded_size Unpadded Size of a Block
* \param uncompressed_size Uncompressed Size of a Block
*
* \return - LZMA_OK
* - LZMA_DATA_ERROR: Compressed or uncompressed size of the
* Stream or size of the Index field would grow too big.
* - LZMA_PROG_ERROR: Invalid arguments or this function is being
* used when lzma_index_hash_decode() has already been used.
*/
extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash,
lzma_vli unpadded_size, lzma_vli uncompressed_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode and validate the Index field
*
* After telling the sizes of all Blocks with lzma_index_hash_append(),
* the actual Index field is decoded with this function. Specifically,
* once decoding of the Index field has been started, no more Records
* can be added using lzma_index_hash_append().
*
* This function doesn't use lzma_stream structure to pass the input data.
* Instead, the input buffer is specified using three arguments. This is
* because it matches better the internal APIs of liblzma.
*
* \param index_hash Pointer to a lzma_index_hash structure
* \param in Pointer to the beginning of the input buffer
* \param in_pos in[*in_pos] is the next byte to process
* \param in_size in[in_size] is the first byte not to process
*
* \return - LZMA_OK: So far good, but more input is needed.
* - LZMA_STREAM_END: Index decoded successfully and it matches
* the Records given with lzma_index_hash_append().
* - LZMA_DATA_ERROR: Index is corrupt or doesn't match the
* information given with lzma_index_hash_append().
* - LZMA_BUF_ERROR: Cannot progress because *in_pos >= in_size.
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash,
const uint8_t *in, size_t *in_pos, size_t in_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Get the size of the Index field as bytes
*
* This is needed to verify the Backward Size field in the Stream Footer.
*/
extern LZMA_API(lzma_vli) lzma_index_hash_size(
const lzma_index_hash *index_hash)
lzma_nothrow lzma_attr_pure;
This diff is collapsed.
/**
* \file lzma/stream_flags.h
* \brief .xz Stream Header and Stream Footer encoder and decoder
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Size of Stream Header and Stream Footer
*
* Stream Header and Stream Footer have the same size and they are not
* going to change even if a newer version of the .xz file format is
* developed in future.
*/
#define LZMA_STREAM_HEADER_SIZE 12
/**
* \brief Options for encoding/decoding Stream Header and Stream Footer
*/
typedef struct {
/**
* \brief Stream Flags format version
*
* To prevent API and ABI breakages if new features are needed in
* Stream Header or Stream Footer, a version number is used to
* indicate which fields in this structure are in use. For now,
* version must always be zero. With non-zero version, the
* lzma_stream_header_encode() and lzma_stream_footer_encode()
* will return LZMA_OPTIONS_ERROR.
*
* lzma_stream_header_decode() and lzma_stream_footer_decode()
* will always set this to the lowest value that supports all the
* features indicated by the Stream Flags field. The application
* must check that the version number set by the decoding functions
* is supported by the application. Otherwise it is possible that
* the application will decode the Stream incorrectly.
*/
uint32_t version;
/**
* \brief Backward Size
*
* Backward Size must be a multiple of four bytes. In this Stream
* format version, Backward Size is the size of the Index field.
*
* Backward Size isn't actually part of the Stream Flags field, but
* it is convenient to include in this structure anyway. Backward
* Size is present only in the Stream Footer. There is no need to
* initialize backward_size when encoding Stream Header.
*
* lzma_stream_header_decode() always sets backward_size to
* LZMA_VLI_UNKNOWN so that it is convenient to use
* lzma_stream_flags_compare() when both Stream Header and Stream
* Footer have been decoded.
*/
lzma_vli backward_size;
# define LZMA_BACKWARD_SIZE_MIN 4
# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34)
/**
* \brief Check ID
*
* This indicates the type of the integrity check calculated from
* uncompressed data.
*/
lzma_check check;
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. You should not touch these, because the
* names of these variables may change.
*
* (We will never be able to use all of these since Stream Flags
* is just two bytes plus Backward Size of four bytes. But it's
* nice to have the proper types when they are needed.)
*/
lzma_reserved_enum reserved_enum1;
lzma_reserved_enum reserved_enum2;
lzma_reserved_enum reserved_enum3;
lzma_reserved_enum reserved_enum4;
lzma_reserved_enum reserved_enum5;
lzma_reserved_enum reserved_enum6;
lzma_bool reserved_bool1;
lzma_bool reserved_bool2;
lzma_bool reserved_bool3;
lzma_bool reserved_bool4;
lzma_bool reserved_bool5;
lzma_bool reserved_bool6;
lzma_bool reserved_bool7;
lzma_bool reserved_bool8;
uint32_t reserved_int1;
uint32_t reserved_int2;
uint32_t reserved_int3;
uint32_t reserved_int4;
} lzma_stream_flags;
/**
* \brief Encode Stream Header
*
* \param options Stream Header options to be encoded.
* options->backward_size is ignored and doesn't
* need to be initialized.
* \param out Beginning of the output buffer of
* LZMA_STREAM_HEADER_SIZE bytes.
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_OPTIONS_ERROR: options->version is not supported by
* this liblzma version.
* - LZMA_PROG_ERROR: Invalid options.
*/
extern LZMA_API(lzma_ret) lzma_stream_header_encode(
const lzma_stream_flags *options, uint8_t *out)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Encode Stream Footer
*
* \param options Stream Footer options to be encoded.
* \param out Beginning of the output buffer of
* LZMA_STREAM_HEADER_SIZE bytes.
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_OPTIONS_ERROR: options->version is not supported by
* this liblzma version.
* - LZMA_PROG_ERROR: Invalid options.
*/
extern LZMA_API(lzma_ret) lzma_stream_footer_encode(
const lzma_stream_flags *options, uint8_t *out)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode Stream Header
*
* \param options Stream Header options to be encoded.
* \param in Beginning of the input buffer of
* LZMA_STREAM_HEADER_SIZE bytes.
*
* options->backward_size is always set to LZMA_VLI_UNKNOWN. This is to
* help comparing Stream Flags from Stream Header and Stream Footer with
* lzma_stream_flags_compare().
*
* \return - LZMA_OK: Decoding was successful.
* - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given
* buffer cannot be Stream Header.
* - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header
* is corrupt.
* - LZMA_OPTIONS_ERROR: Unsupported options are present
* in the header.
*
* \note When decoding .xz files that contain multiple Streams, it may
* make sense to print "file format not recognized" only if
* decoding of the Stream Header of the _first_ Stream gives
* LZMA_FORMAT_ERROR. If non-first Stream Header gives
* LZMA_FORMAT_ERROR, the message used for LZMA_DATA_ERROR is
* probably more appropriate.
*
* For example, Stream decoder in liblzma uses LZMA_DATA_ERROR if
* LZMA_FORMAT_ERROR is returned by lzma_stream_header_decode()
* when decoding non-first Stream.
*/
extern LZMA_API(lzma_ret) lzma_stream_header_decode(
lzma_stream_flags *options, const uint8_t *in)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode Stream Footer
*
* \param options Stream Header options to be encoded.
* \param in Beginning of the input buffer of
* LZMA_STREAM_HEADER_SIZE bytes.
*
* \return - LZMA_OK: Decoding was successful.
* - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given
* buffer cannot be Stream Footer.
* - LZMA_DATA_ERROR: CRC32 doesn't match, thus the Stream Footer
* is corrupt.
* - LZMA_OPTIONS_ERROR: Unsupported options are present
* in Stream Footer.
*
* \note If Stream Header was already decoded successfully, but
* decoding Stream Footer returns LZMA_FORMAT_ERROR, the
* application should probably report some other error message
* than "file format not recognized", since the file more likely
* is corrupt (possibly truncated). Stream decoder in liblzma
* uses LZMA_DATA_ERROR in this situation.
*/
extern LZMA_API(lzma_ret) lzma_stream_footer_decode(
lzma_stream_flags *options, const uint8_t *in)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Compare two lzma_stream_flags structures
*
* backward_size values are compared only if both are not
* LZMA_VLI_UNKNOWN.
*
* \return - LZMA_OK: Both are equal. If either had backward_size set
* to LZMA_VLI_UNKNOWN, backward_size values were not
* compared or validated.
* - LZMA_DATA_ERROR: The structures differ.
* - LZMA_OPTIONS_ERROR: version in either structure is greater
* than the maximum supported version (currently zero).
* - LZMA_PROG_ERROR: Invalid value, e.g. invalid check or
* backward_size.
*/
extern LZMA_API(lzma_ret) lzma_stream_flags_compare(
const lzma_stream_flags *a, const lzma_stream_flags *b)
lzma_nothrow lzma_attr_pure;
/**
* \file lzma/subblock.h
* \brief Subblock filter
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Filter ID
*
* Filter ID of the Subblock filter. This is used as lzma_filter.id.
*/
#define LZMA_FILTER_SUBBLOCK LZMA_VLI_C(0x01)
/**
* \brief Subfilter mode
*
* See lzma_options_subblock.subfilter_mode for details.
*/
typedef enum {
LZMA_SUBFILTER_NONE,
/**<
* No Subfilter is in use.
*/
LZMA_SUBFILTER_SET,
/**<
* New Subfilter has been requested to be initialized.
*/
LZMA_SUBFILTER_RUN,
/**<
* Subfilter is active.
*/
LZMA_SUBFILTER_FINISH
/**<
* Subfilter has been requested to be finished.
*/
} lzma_subfilter_mode;
/**
* \brief Options for the Subblock filter
*
* Specifying options for the Subblock filter is optional: if the pointer
* options is NULL, no subfilters are allowed and the default value is used
* for subblock_data_size.
*/
typedef struct {
/* Options for encoder and decoder */
/**
* \brief Allowing subfilters
*
* If this true, subfilters are allowed.
*
* In the encoder, if this is set to false, subfilter_mode and
* subfilter_options are completely ignored.
*/
lzma_bool allow_subfilters;
/* Options for encoder only */
/**
* \brief Alignment
*
* The Subblock filter encapsulates the input data into Subblocks.
* Each Subblock has a header which takes a few bytes of space.
* When the output of the Subblock encoder is fed to another filter
* that takes advantage of the alignment of the input data (e.g. LZMA),
* the Subblock filter can add padding to keep the actual data parts
* in the Subblocks aligned correctly.
*
* The alignment should be a positive integer. Subblock filter will
* add enough padding between Subblocks so that this is true for
* every payload byte:
* input_offset % alignment == output_offset % alignment
*
* The Subblock filter assumes that the first output byte will be
* written to a position in the output stream that is properly
* aligned. This requirement is automatically met when the start
* offset of the Stream or Block is correctly told to Block or
* Stream encoder.
*/
uint32_t alignment;
# define LZMA_SUBBLOCK_ALIGNMENT_MIN 1
# define LZMA_SUBBLOCK_ALIGNMENT_MAX 32
# define LZMA_SUBBLOCK_ALIGNMENT_DEFAULT 4
/**
* \brief Size of the Subblock Data part of each Subblock
*
* This value is re-read every time a new Subblock is started.
*
* Bigger values
* - save a few bytes of space;
* - increase latency in the encoder (but no effect for decoding);
* - decrease memory locality (increased cache pollution) in the
* encoder (no effect in decoding).
*/
uint32_t subblock_data_size;
# define LZMA_SUBBLOCK_DATA_SIZE_MIN 1
# define LZMA_SUBBLOCK_DATA_SIZE_MAX (UINT32_C(1) << 28)
# define LZMA_SUBBLOCK_DATA_SIZE_DEFAULT 4096
/**
* \brief Run-length encoder remote control
*
* The Subblock filter has an internal run-length encoder (RLE). It
* can be useful when the data includes byte sequences that repeat
* very many times. The RLE can be used also when a Subfilter is
* in use; the RLE will be applied to the output of the Subfilter.
*
* Note that in contrast to traditional RLE, this RLE is intended to
* be used only when there's a lot of data to be repeated. If the
* input data has e.g. 500 bytes of NULs now and then, this RLE
* is probably useless, because plain LZMA should provide better
* results.
*
* Due to above reasons, it was decided to keep the implementation
* of the RLE very simple. When the rle variable is non-zero, it
* subblock_data_size must be a multiple of rle. Once the Subblock
* encoder has got subblock_data_size bytes of input, it will check
* if the whole buffer of the last subblock_data_size can be
* represented with repeats of chunks having size of rle bytes.
*
* If there are consecutive identical buffers of subblock_data_size
* bytes, they will be encoded using a single repeat entry if
* possible.
*
* If need arises, more advanced RLE can be implemented later
* without breaking API or ABI.
*/
uint32_t rle;
# define LZMA_SUBBLOCK_RLE_OFF 0
# define LZMA_SUBBLOCK_RLE_MIN 1
# define LZMA_SUBBLOCK_RLE_MAX 256
/**
* \brief Subfilter remote control
*
* When the Subblock filter is initialized, this variable must be
* LZMA_SUBFILTER_NONE or LZMA_SUBFILTER_SET.
*
* When subfilter_mode is LZMA_SUBFILTER_NONE, the application may
* put Subfilter options to subfilter_options structure, and then
* set subfilter_mode to LZMA_SUBFILTER_SET. No new input data will
* be read until the Subfilter has been enabled. Once the Subfilter
* has been enabled, liblzma will set subfilter_mode to
* LZMA_SUBFILTER_RUN.
*
* When subfilter_mode is LZMA_SUBFILTER_RUN, the application may
* set subfilter_mode to LZMA_SUBFILTER_FINISH. All the input
* currently available will be encoded before unsetting the
* Subfilter. Application must not change the amount of available
* input until the Subfilter has finished. Once the Subfilter has
* finished, liblzma will set subfilter_mode to LZMA_SUBFILTER_NONE.
*
* If the intent is to have Subfilter enabled to the very end of
* the data, it is not needed to separately disable Subfilter with
* LZMA_SUBFILTER_FINISH. Using LZMA_FINISH as the second argument
* of lzma_code() will make the Subblock encoder to disable the
* Subfilter once all the data has been ran through the Subfilter.
*
* After the first call with LZMA_SYNC_FLUSH or LZMA_FINISH, the
* application must not change subfilter_mode until LZMA_STREAM_END.
* Setting LZMA_SUBFILTER_SET/LZMA_SUBFILTER_FINISH and
* LZMA_SYNC_FLUSH/LZMA_FINISH _at the same time_ is fine.
*
* \note This variable is ignored if allow_subfilters is false.
*/
lzma_subfilter_mode subfilter_mode;
/**
* \brief Subfilter and its options
*
* When no Subfilter is used, the data is copied as is into Subblocks.
* Setting a Subfilter allows encoding some parts of the data with
* an additional filter. It is possible to many different Subfilters
* in the same Block, although only one can be used at once.
*
* \note This variable is ignored if allow_subfilters is false.
*/
lzma_filter subfilter_options;
} lzma_options_subblock;
/**
* \file lzma/version.h
* \brief Version number
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/*
* Version number splitted in components
*/
#define LZMA_VERSION_MAJOR 4
#define LZMA_VERSION_MINOR 999
#define LZMA_VERSION_PATCH 9
#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_BETA
#ifndef LZMA_VERSION_COMMIT
# define LZMA_VERSION_COMMIT ""
#endif
/*
* Map symbolic stability levels to integers.
*/
#define LZMA_VERSION_STABILITY_ALPHA 0
#define LZMA_VERSION_STABILITY_BETA 1
#define LZMA_VERSION_STABILITY_STABLE 2
/**
* \brief Compile-time version number
*
* The version number is of format xyyyzzzs where
* - x = major
* - yyy = minor
* - zzz = revision
* - s indicates stability: 0 = alpha, 1 = beta, 2 = stable
*
* The same xyyyzzz triplet is never reused with different stability levels.
* For example, if 5.1.0alpha has been released, there will never be 5.1.0beta
* or 5.1.0 stable.
*
* \note The version number of liblzma has nothing to with
* the version number of Igor Pavlov's LZMA SDK.
*/
#define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \
+ LZMA_VERSION_MINOR * UINT32_C(10000) \
+ LZMA_VERSION_PATCH * UINT32_C(10) \
+ LZMA_VERSION_STABILITY)
/*
* Macros to construct the compile-time version string
*/
#if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA
# define LZMA_VERSION_STABILITY_STRING "alpha"
#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA
# define LZMA_VERSION_STABILITY_STRING "beta"
#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE
# define LZMA_VERSION_STABILITY_STRING ""
#else
# error Incorrect LZMA_VERSION_STABILITY
#endif
#define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \
#major "." #minor "." #patch stability commit
#define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \
LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit)
/**
* \brief Compile-time version as a string
*
* This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable
* versions don't have any "stable" suffix). In future, a snapshot built
* from source code repository may include an additional suffix, for example
* "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form
* in LZMA_VERSION macro.
*/
#define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \
LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \
LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \
LZMA_VERSION_COMMIT)
/* #ifndef is needed for use with windres (MinGW or Cygwin). */
#ifndef LZMA_H_INTERNAL_RC
/**
* \brief Run-time version number as an integer
*
* Return the value of LZMA_VERSION macro at the compile time of liblzma.
* This allows the application to compare if it was built against the same,
* older, or newer version of liblzma that is currently running.
*/
extern LZMA_API(uint32_t) lzma_version_number(void)
lzma_nothrow lzma_attr_const;
/**
* \brief Run-time version as a string
*
* This function may be useful if you want to display which version of
* liblzma your application is currently using.
*/
extern LZMA_API(const char *) lzma_version_string(void)
lzma_nothrow lzma_attr_const;
#endif
/**
* \file lzma/vli.h
* \brief Variable-length integer handling
*
* In the .xz format, most integers are encoded in a variable-length
* representation, which is sometimes called little endian base-128 encoding.
* This saves space when smaller values are more likely than bigger values.
*
* The encoding scheme encodes seven bits to every byte, using minimum
* number of bytes required to represent the given value. Encodings that use
* non-minimum number of bytes are invalid, thus every integer has exactly
* one encoded representation. The maximum number of bits in a VLI is 63,
* thus the vli argument must be at maximum of UINT64_MAX / 2. You should
* use LZMA_VLI_MAX for clarity.
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Maximum supported value of variable-length integer
*/
#define LZMA_VLI_MAX (UINT64_MAX / 2)
/**
* \brief VLI value to denote that the value is unknown
*/
#define LZMA_VLI_UNKNOWN UINT64_MAX
/**
* \brief Maximum supported length of variable length integers
*/
#define LZMA_VLI_BYTES_MAX 9
/**
* \brief VLI constant suffix
*/
#define LZMA_VLI_C(n) UINT64_C(n)
/**
* \brief Variable-length integer type
*
* This will always be unsigned integer. Valid VLI values are in the range
* [0, LZMA_VLI_MAX]. Unknown value is indicated with LZMA_VLI_UNKNOWN,
* which is the maximum value of the underlaying integer type.
*
* In future, even if lzma_vli is typdefined to something else than uint64_t,
* it is guaranteed that 2 * LZMA_VLI_MAX will not overflow lzma_vli.
* This simplifies integer overflow detection.
*/
typedef uint64_t lzma_vli;
/**
* \brief Simple macro to validate variable-length integer
*
* This is useful to test that application has given acceptable values
* for example in the uncompressed_size and compressed_size variables.
*
* \return True if the integer is representable as VLI or if it
* indicates unknown value.
*/
#define lzma_vli_is_valid(vli) \
((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN)
/**
* \brief Encode a variable-length integer
*
* This function has two modes: single-call and multi-call. Single-call mode
* encodes the whole integer at once; it is an error if the output buffer is
* too small. Multi-call mode saves the position in *vli_pos, and thus it is
* possible to continue encoding if the buffer becomes full before the whole
* integer has been encoded.
*
* \param vli Integer to be encoded
* \param vli_pos How many VLI-encoded bytes have already been written
* out. When starting to encode a new integer, *vli_pos
* must be set to zero. To use single-call encoding,
* set vli_pos to NULL.
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return Slightly different return values are used in multi-call and
* single-call modes.
*
* Single-call (vli_pos == NULL):
* - LZMA_OK: Integer successfully encoded.
* - LZMA_PROG_ERROR: Arguments are not sane. This can be due
* to too little output space; single-call mode doesn't use
* LZMA_BUF_ERROR, since the application should have checked
* the encoded size with lzma_vli_size().
*
* Multi-call (vli_pos != NULL):
* - LZMA_OK: So far all OK, but the integer is not
* completely written out yet.
* - LZMA_STREAM_END: Integer successfully encoded.
* - LZMA_BUF_ERROR: No output space was provided.
* - LZMA_PROG_ERROR: Arguments are not sane.
*/
extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli,
size_t *lzma_restrict vli_pos, uint8_t *lzma_restrict out,
size_t *lzma_restrict out_pos, size_t out_size) lzma_nothrow;
/**
* \brief Decode a variable-length integer
*
* Like lzma_vli_encode(), this function has single-call and multi-call modes.
*
* \param vli Pointer to decoded integer. The decoder will
* initialize it to zero when *vli_pos == 0, so
* application isn't required to initialize *vli.
* \param vli_pos How many bytes have already been decoded. When
* starting to decode a new integer, *vli_pos must
* be initialized to zero. To use single-call decoding,
* set this to NULL.
* \param in Beginning of the input buffer
* \param in_pos The next byte will be read from in[*in_pos].
* \param in_size Size of the input buffer; the first byte that
* won't be read is in[in_size].
*
* \return Slightly different return values are used in multi-call and
* single-call modes.
*
* Single-call (vli_pos == NULL):
* - LZMA_OK: Integer successfully decoded.
* - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting
* the end of the input buffer before the whole integer was
* decoded; providing no input at all will use LZMA_DATA_ERROR.
* - LZMA_PROG_ERROR: Arguments are not sane.
*
* Multi-call (vli_pos != NULL):
* - LZMA_OK: So far all OK, but the integer is not
* completely decoded yet.
* - LZMA_STREAM_END: Integer successfully decoded.
* - LZMA_DATA_ERROR: Integer is corrupt.
* - LZMA_BUF_ERROR: No input was provided.
* - LZMA_PROG_ERROR: Arguments are not sane.
*/
extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *lzma_restrict vli,
size_t *lzma_restrict vli_pos, const uint8_t *lzma_restrict in,
size_t *lzma_restrict in_pos, size_t in_size) lzma_nothrow;
/**
* \brief Get the number of bytes required to encode a VLI
*
* \return Number of bytes on success (1-9). If vli isn't valid,
* zero is returned.
*/
extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli)
lzma_nothrow lzma_attr_pure;
......@@ -119,10 +119,10 @@ NEWBRT_O_FILES += brtloader.$(OEXT) quicklz.$(OEXT) compress.$(OEXT)
brtloader.$(OEXT): $(DEPEND_COMPILE)
$(NEWBRT_O_FILES): VISIBILITY=
$(NEWBRT_O_FILES): $(LZMA_H)
$(NEWBRT_SO): DISABLE_WARNING += 10237 # Do not complain about -lcilkrts being linked in dynamically, static library not available
$(NEWBRT_SO): $(NEWBRT_O_FILES)
echo $(patsubst %,newbrt/%,$(NEWBRT_O_FILES)) > ../lib/newbrt.olist
$(TOKULINKER) $(SHARED) $(SYMBOLS) $(GCOV_FLAGS) $(SKIP_WARNING) $(NEWBRT_O_FILES) -o$(NEWBRT_SO) $(LINUX_NOSTDLIB) $(LCILKRTS)
$(NEWBRT_SO): $(NEWBRT_O_FILES) $(LZMA_A)
$(TOKULINKER) $(SHARED) $(SYMBOLS) $(GCOV_FLAGS) $(SKIP_WARNING) $(NEWBRT_O_FILES) $(LZMA_A) -o$(NEWBRT_SO) $(LINUX_NOSTDLIB) $(LCILKRTS)
$(NEWBRT_A): $(NEWBRT_O_FILES)
log_code.$(OEXT): log_header.h wbuf.h log-internal.h rbuf.h
......@@ -145,7 +145,7 @@ logformat$(BINSUF): logformat.c $(LIBPORTABILITY_SO)
ifeq ($(PROF),1)
libs: $(NEWBRT_A)
else
libs: $(NEWBRT_SO)
libs: $(NEWBRT_SO) $(NEWBRT_A)
endif
bins: $(BINS)
......@@ -176,13 +176,14 @@ clean: clean-local clean-tests
clean-tests:
cd tests;$(MAKE) clean
clean-local:
$(RM) $(TOKUROOT)lib/libnewbrt.$(AEXT) $(TOKUROOT)lib/libnewbrt.$(SOEXT)
rm -rf $(NEWBRT)
rm -rf test_oexcl.c.tmp *.brt
rm -rf log_code.c log_header.h log_print.c logformat
brtdump_static$(BINSUF): DEPEND_LINK = -lpthread -ldl -lz
brtdump_static$(BINSUF): brtdump.$(OEXT) $(NEWBRT_A) $(LIBPORTABILITY_A) $(DEPEND_COMPILE) $(DEPEND_LINK)
$(CC) $< $(NEWBRT_A) $(LIBPORTABILITY_A) $(BIN_FROM_O_FLAGS_NOLIB) $(ALWAYS_LINK) $(LINK_MUST_BE_LAST)
brtdump_static$(BINSUF): brtdump.$(OEXT) $(NEWBRT_A) $(LZMA_A) $(LIBPORTABILITY_A) $(DEPEND_COMPILE) $(DEPEND_LINK)
$(CC) $< $(NEWBRT_A) $(LZMA_A) $(LIBPORTABILITY_A) $(BIN_FROM_O_FLAGS_NOLIB) $(ALWAYS_LINK) $(LINK_MUST_BE_LAST)
# After doing (cd ../src/tests;make test_log5.recover), run these. The files should have no differences.
testdump: brtdump$(BINSUF)
......
......@@ -27,6 +27,7 @@
#include "block_table.h"
#include "c_dialects.h"
#include "mempool.h"
#include "compress.h"
// Uncomment the following to use quicklz
......@@ -411,6 +412,7 @@ struct brt_header {
uint32_t count_of_optimize_in_progress; // the number of hot optimize operations currently in progress on this tree
uint32_t count_of_optimize_in_progress_read_from_disk; // the number of hot optimize operations in progress on this tree at the time of the last crash (this field is in-memory only)
MSN msn_at_start_of_last_completed_optimize; // all messages before this msn have been applied to leaf nodes
enum toku_compression_method compression_method;
};
struct brt {
......
......@@ -348,9 +348,9 @@ serialize_brtnode_partition(BRTNODE node, int i, struct sub_block *sb) {
// into a newly allocated buffer sb->compressed_ptr
//
static void
compress_brtnode_sub_block(struct sub_block *sb) {
compress_brtnode_sub_block(struct sub_block *sb, enum toku_compression_method method) {
assert(sb->compressed_ptr == NULL);
set_compressed_size_bound(sb);
set_compressed_size_bound(sb, method);
// add 8 extra bytes, 4 for compressed size, 4 for decompressed size
sb->compressed_ptr = toku_xmalloc(sb->compressed_size_bound + 8);
//
......@@ -371,7 +371,8 @@ compress_brtnode_sub_block(struct sub_block *sb) {
sb->compressed_size = compress_nocrc_sub_block(
sb,
(char *)sb->compressed_ptr + 8,
sb->compressed_size_bound
sb->compressed_size_bound,
method
);
u_int32_t* extra = (u_int32_t *)(sb->compressed_ptr);
......@@ -691,13 +692,13 @@ static void
serialize_and_compress_partition(BRTNODE node, int childnum, SUB_BLOCK sb)
{
serialize_brtnode_partition(node, childnum, sb);
compress_brtnode_sub_block(sb);
compress_brtnode_sub_block(sb, node->h->compression_method);
}
void
toku_create_compressed_partition_from_available(
BRTNODE node,
int childnum,
BRTNODE node,
int childnum,
SUB_BLOCK sb
)
{
......@@ -773,7 +774,7 @@ toku_serialize_brtnode_to_memory (BRTNODE node,
// This does NOT include the header
//
serialize_brtnode_info(node, &sb_node_info);
compress_brtnode_sub_block(&sb_node_info);
compress_brtnode_sub_block(&sb_node_info, node->h->compression_method);
// now we have compressed each of our pieces into individual sub_blocks,
// we can put the header and all the subblocks into a single buffer
......@@ -1838,6 +1839,8 @@ serialize_brt_header_min_size (u_int32_t version) {
switch(version) {
case BRT_LAYOUT_VERSION_19:
size += 1; // compression method
case BRT_LAYOUT_VERSION_18:
size += sizeof(uint64_t); // time_of_last_optimize_begin
size += sizeof(uint64_t); // time_of_last_optimize_end
......@@ -1923,6 +1926,7 @@ int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h,
wbuf_ulonglong(wbuf, h->time_of_last_optimize_end);
wbuf_int(wbuf, h->count_of_optimize_in_progress);
wbuf_MSN(wbuf, h->msn_at_start_of_last_completed_optimize);
wbuf_char(wbuf, (unsigned char) h->compression_method);
u_int32_t checksum = x1764_finish(&wbuf->checksum);
wbuf_int(wbuf, checksum);
lazy_assert(wbuf->ndone == wbuf->size);
......@@ -2204,6 +2208,17 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
h->count_of_optimize_in_progress_read_from_disk = h->count_of_optimize_in_progress;
h->msn_at_start_of_last_completed_optimize = rbuf_msn(&rc);
}
if (h->layout_version >= BRT_LAYOUT_VERSION_19) {
unsigned char method = rbuf_char(&rc);
h->compression_method = (enum toku_compression_method) method;
} else {
// we hard coded zlib until 5.2, then quicklz in 5.2
if (h->layout_version < BRT_LAYOUT_VERSION_18) {
h->compression_method = TOKU_ZLIB_METHOD;
} else {
h->compression_method = TOKU_QUICKLZ_METHOD;
}
}
(void)rbuf_int(&rc); //Read in checksum and ignore (already verified).
if (rc.ndone!=rc.size) {ret = EINVAL; goto died1;}
......@@ -2257,6 +2272,7 @@ deserialize_brtheader_versioned (int fd, struct rbuf *rb, struct brt_header **br
case BRT_LAYOUT_VERSION_14:
h->basementnodesize = 128*1024; // basement nodes added in v15
//fall through on purpose
case BRT_LAYOUT_VERSION_19:
case BRT_LAYOUT_VERSION_18:
case BRT_LAYOUT_VERSION_17: // version 17 never released to customers
case BRT_LAYOUT_VERSION_16: // version 16 never released to customers
......@@ -2527,10 +2543,11 @@ static int
serialize_uncompressed_block_to_memory(char * uncompressed_buf,
int n_sub_blocks,
struct sub_block sub_block[/*n_sub_blocks*/],
enum toku_compression_method method,
/*out*/ size_t *n_bytes_to_write,
/*out*/ char **bytes_to_write) {
// allocate space for the compressed uncompressed_buf
size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block);
size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block, method);
size_t sub_block_header_len = sub_block_header_size(n_sub_blocks);
size_t header_len = node_header_overhead + sub_block_header_len + sizeof (uint32_t); // node + sub_block + checksum
char *XMALLOC_N(header_len + compressed_len, compressed_buf);
......@@ -2546,7 +2563,7 @@ serialize_uncompressed_block_to_memory(char * uncompressed_buf,
// compress all of the sub blocks
char *uncompressed_ptr = uncompressed_buf + node_header_overhead;
char *compressed_ptr = compressed_buf + header_len;
compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, num_cores, brt_pool);
compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, num_cores, brt_pool, method);
//if (0) printf("Block %" PRId64 " Size before compressing %u, after compression %"PRIu64"\n", blocknum.b, calculated_size-node_header_overhead, (uint64_t) compressed_len);
......@@ -2576,6 +2593,7 @@ serialize_uncompressed_block_to_memory(char * uncompressed_buf,
static int
toku_serialize_rollback_log_to_memory (ROLLBACK_LOG_NODE log,
int UU(n_workitems), int UU(n_threads),
enum toku_compression_method method,
/*out*/ size_t *n_bytes_to_write,
/*out*/ char **bytes_to_write) {
// get the size of the serialized node
......@@ -2600,7 +2618,7 @@ toku_serialize_rollback_log_to_memory (ROLLBACK_LOG_NODE log,
serialize_rollback_log_node_to_buf(log, buf, calculated_size, n_sub_blocks, sub_block);
//Compress and malloc buffer to write
int result = serialize_uncompressed_block_to_memory(buf, n_sub_blocks, sub_block,
int result = serialize_uncompressed_block_to_memory(buf, n_sub_blocks, sub_block, method,
n_bytes_to_write, bytes_to_write);
toku_free(buf);
return result;
......@@ -2613,7 +2631,7 @@ toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE log
size_t n_to_write;
char *compressed_buf;
{
int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, &n_to_write, &compressed_buf);
int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, h->compression_method, &n_to_write, &compressed_buf);
if (r!=0) return r;
}
......
......@@ -3349,6 +3349,7 @@ brt_init_header (BRT t, TOKUTXN txn) {
//Assign blocknum for root block, also dirty the header
toku_allocate_blocknum(t->h->blocktable, &root, t->h);
t->h->root_blocknum = root;
t->h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_list_init(&t->h->live_brts);
toku_list_init(&t->h->zombie_brts);
......@@ -3391,6 +3392,20 @@ brt_alloc_init_header(BRT t, TOKUTXN txn) {
return r;
}
int
toku_brt_set_compression_method(BRT t, enum toku_compression_method method)
{
t->h->compression_method = method;
return 0;
}
int
toku_brt_get_compression_method(BRT t, enum toku_compression_method *methodp)
{
*methodp = t->h->compression_method;
return 0;
}
int toku_read_brt_header_and_store_in_cachefile (BRT brt, CACHEFILE cf, LSN max_acceptable_lsn, struct brt_header **header, BOOL* was_open)
// If the cachefile already has the header, then just get it.
// If the cachefile has not been initialized, then don't modify anything.
......@@ -6857,6 +6872,7 @@ toku_brt_header_init(struct brt_header *h,
h->root_blocknum = root_blocknum_on_disk;
h->flags = 0;
h->root_xid_that_created = root_xid_that_created;
h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
}
#include <valgrind/helgrind.h>
......
......@@ -14,6 +14,7 @@
#include "log.h"
#include "brt-search.h"
#include "c_dialects.h"
#include "compress.h"
C_BEGIN
......@@ -49,6 +50,8 @@ int toku_brt_get_nodesize(BRT, unsigned int *nodesize) __attribute__ ((warn_unus
void toku_brt_get_maximum_advised_key_value_lengths(unsigned int *klimit, unsigned int *vlimit);
int toku_brt_set_basementnodesize(BRT, unsigned int basementnodesize) __attribute__ ((warn_unused_result));
int toku_brt_get_basementnodesize(BRT, unsigned int *basementnodesize) __attribute__ ((warn_unused_result));
int toku_brt_set_compression_method(BRT, enum toku_compression_method) __attribute__ ((warn_unused_result));
int toku_brt_get_compression_method(BRT, enum toku_compression_method *) __attribute__((warn_unused_result));
int toku_brt_set_bt_compare(BRT, brt_compare_func) __attribute__ ((warn_unused_result));
brt_compare_func toku_brt_get_bt_compare (BRT brt);
......
......@@ -23,6 +23,7 @@ enum brt_layout_version_e {
// ALERT ALERT ALERT: version 16 never released to customers, internal and beta use only
BRT_LAYOUT_VERSION_17 = 17, // Dr. No: Add STAT64INFO_S to brt_header
BRT_LAYOUT_VERSION_18 = 18, // Dr. No: Add HOT info to brt_header
BRT_LAYOUT_VERSION_19 = 19, // Doofenshmirtz: Add compression method, msn_for_upgrade, TODO
BRT_NEXT_VERSION, // the version after the current version
BRT_LAYOUT_VERSION = BRT_NEXT_VERSION-1, // A hack so I don't have to change this line.
BRT_LAYOUT_MIN_SUPPORTED_VERSION = BRT_LAYOUT_VERSION_13, // Minimum version supported
......
......@@ -1848,6 +1848,7 @@ static int cachetable_put_internal(
);
assert(p);
nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex);
//note_hash_count(count);
return 0;
}
......@@ -2750,6 +2751,7 @@ int toku_cachetable_get_and_pin_nonblocking (
if (!nb_mutex_writers(&p->value_nb_mutex) &&
(!may_modify_value || resolve_checkpointing_fast(p)))
{
//cachetable_hit++;
nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex);
if (may_modify_value && p->checkpoint_pending) {
write_locked_pair_for_checkpoint(ct, p);
......
......@@ -3,22 +3,42 @@
#ident "$Id$"
#include <zlib.h>
#include <lzma.h>
#include "compress.h"
#include "memory.h"
#include "quicklz.h"
#include "toku_assert.h"
static inline enum toku_compression_method
normalize_compression_method(enum toku_compression_method method)
// Effect: resolve "friendly" names like "fast" and "small" into their real values.
{
switch (method) {
case TOKU_FAST_COMPRESSION_METHOD:
return TOKU_QUICKLZ_METHOD;
case TOKU_SMALL_COMPRESSION_METHOD:
return TOKU_LZMA_METHOD;
default:
return method; // everything else is fine
}
}
size_t toku_compress_bound (enum toku_compression_method a, size_t size)
// See compress.h for the specification of this function.
{
a = normalize_compression_method(a);
switch (a) {
case TOKU_NO_COMPRESSION:
return size + 1;
case TOKU_LZMA_METHOD:
return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level)
case TOKU_QUICKLZ_METHOD:
return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL.
case TOKU_ZLIB_METHOD:
return compressBound (size);
default:
break;
}
// fall through for bad enum (thus compiler can warn us if we didn't use all the enums
assert(0); return 0;
......@@ -32,6 +52,7 @@ void toku_compress (enum toku_compression_method a,
const Bytef *source, uLong sourceLen)
// See compress.h for the specification of this function.
{
a = normalize_compression_method(a);
assert(sourceLen < (1LL << 32));
switch (a) {
case TOKU_NO_COMPRESSION:
......@@ -60,7 +81,29 @@ void toku_compress (enum toku_compression_method a,
// Fill in that first byte
dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4);
return;
}}
}
case TOKU_LZMA_METHOD: {
const int lzma_compression_level = 2;
if (sourceLen==0) {
// lzma version 4.999 requires at least one byte, so we'll do it ourselves.
assert(1<=*destLen);
*destLen = 1;
} else {
size_t out_pos = 1;
lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level, LZMA_CHECK_CRC32, NULL,
source, sourceLen,
dest, &out_pos, *destLen);
assert(out_pos < *destLen);
assert(r==LZMA_OK);
*destLen = out_pos;
}
dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4);
return;
}
default:
break;
}
// default fall through to error.
assert(0);
}
......@@ -92,6 +135,24 @@ void toku_decompress (Bytef *dest, uLongf destLen,
assert(destLen==0);
}
return;
case TOKU_LZMA_METHOD: {
if (sourceLen>1) {
uint64_t memlimit = UINT64_MAX;
size_t out_pos = 0;
size_t in_pos = 1;
lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check
0, // flags
NULL, // allocator
source, &in_pos, sourceLen,
dest, &out_pos, destLen);
assert(r==LZMA_OK);
assert(out_pos == destLen);
} else {
// length 1 means there is no data, so do nothing.
assert(destLen==0);
}
return;
}
}
// default fall through to error.
assert(0);
......
......@@ -5,16 +5,11 @@
#ident "$Id$"
#include <zlib.h>
#include <db.h>
// The following provides an abstraction of quicklz and zlib.
// We offer two compression methods: ZLIB and QUICKLZ.
// The resulting byte string includes enough information for us to decompress it. That is, we can tell whether it's z-compressed or qz-compressed.
enum toku_compression_method {
TOKU_NO_COMPRESSION = 0, // "identity" compression
TOKU_ZLIB_METHOD = 8, // RFC 1950 says use 8 for zlib. It reserves 15 to allow more bytes.
TOKU_QUICKLZ_METHOD = 9 // We use 9 for QUICKLZ with compression level = 3. I couldn't find any standard for any other numbers, so I just use 9. -Bradley
};
// The following provides an abstraction of quicklz and zlib.
// We offer three compression methods: ZLIB, QUICKLZ, and LZMA, as well as a "no compression" option. These options are declared in make_tdb.c.
// The resulting byte string includes enough information for us to decompress it. That is, we can tell whether it's z-compressed or qz-compressed or xz-compressed.
size_t toku_compress_bound (enum toku_compression_method a, size_t size);
// Effect: Return the number of bytes needed to compress a buffer of size SIZE using compression method A.
......
......@@ -37,24 +37,17 @@ sub_block_header_size(int n_sub_blocks) {
return sizeof (u_int32_t) + n_sub_blocks * sizeof (struct stored_sub_block);
}
// Allow the makefile to optionally configure for no compression
#ifdef TOKU_CONFIG_NO_COMPRESSION
static enum toku_compression_method toku_compress_method = TOKU_NO_COMPRESSION;
#else
static enum toku_compression_method toku_compress_method = TOKU_QUICKLZ_METHOD;
#endif
void
set_compressed_size_bound(struct sub_block *se) {
se->compressed_size_bound = toku_compress_bound(toku_compress_method, se->uncompressed_size);
set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method) {
se->compressed_size_bound = toku_compress_bound(method, se->uncompressed_size);
}
// get the sum of the sub block compressed sizes
size_t
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[]) {
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method) {
size_t compressed_size_bound = 0;
for (int i = 0; i < n_sub_blocks; i++) {
sub_block[i].compressed_size_bound = toku_compress_bound(toku_compress_method, sub_block[i].uncompressed_size);
sub_block[i].compressed_size_bound = toku_compress_bound(method, sub_block[i].uncompressed_size);
compressed_size_bound += sub_block[i].compressed_size_bound;
}
return compressed_size_bound;
......@@ -147,23 +140,11 @@ get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offse
#include "workset.h"
void
compress_work_init(struct compress_work *w, struct sub_block *sub_block) {
compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block) {
w->method = method;
w->sub_block = sub_block;
}
void toku_set_default_compression_method (enum toku_compression_method a) {
switch (a) {
case TOKU_NO_COMPRESSION:
case TOKU_ZLIB_METHOD:
case TOKU_QUICKLZ_METHOD:
toku_compress_method = a;
return;
}
// fall through to error
assert(0);
}
//
// takes the uncompressed contents of sub_block
// and compresses them into sb_compressed_ptr
......@@ -172,28 +153,30 @@ void toku_set_default_compression_method (enum toku_compression_method a) {
//
u_int32_t
compress_nocrc_sub_block(
struct sub_block *sub_block,
void* sb_compressed_ptr,
u_int32_t cs_bound
)
struct sub_block *sub_block,
void* sb_compressed_ptr,
u_int32_t cs_bound,
enum toku_compression_method method
)
{
// compress it
Bytef *uncompressed_ptr = (Bytef *) sub_block->uncompressed_ptr;
Bytef *compressed_ptr = (Bytef *) sb_compressed_ptr;
uLongf uncompressed_len = sub_block->uncompressed_size;
uLongf real_compressed_len = cs_bound;
toku_compress(toku_compress_method,
toku_compress(method,
compressed_ptr, &real_compressed_len,
uncompressed_ptr, uncompressed_len);
return real_compressed_len;
return real_compressed_len;
}
void
compress_sub_block(struct sub_block *sub_block) {
compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method) {
sub_block->compressed_size = compress_nocrc_sub_block(
sub_block,
sub_block->compressed_ptr,
sub_block->compressed_size_bound
sub_block,
sub_block->compressed_ptr,
sub_block->compressed_size_bound,
method
);
// checksum it
sub_block->xsum = x1764_memory(sub_block->compressed_ptr, sub_block->compressed_size);
......@@ -206,14 +189,14 @@ compress_worker(void *arg) {
struct compress_work *w = (struct compress_work *) workset_get(ws);
if (w == NULL)
break;
compress_sub_block(w->sub_block);
compress_sub_block(w->sub_block, w->method);
}
workset_release_ref(ws);
return arg;
}
size_t
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool) {
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method) {
char *compressed_base_ptr = compressed_ptr;
size_t compressed_len;
......@@ -223,7 +206,7 @@ compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *un
// single sub-block
sub_block[0].uncompressed_ptr = uncompressed_ptr;
sub_block[0].compressed_ptr = compressed_ptr;
compress_sub_block(&sub_block[0]);
compress_sub_block(&sub_block[0], method);
compressed_len = sub_block[0].compressed_size;
} else {
// multiple sub-blocks
......@@ -241,7 +224,7 @@ compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *un
for (int i = 0; i < n_sub_blocks; i++) {
sub_block[i].uncompressed_ptr = uncompressed_ptr;
sub_block[i].compressed_ptr = compressed_ptr;
compress_work_init(&work[i], &sub_block[i]);
compress_work_init(&work[i], method, &sub_block[i]);
workset_put_locked(&ws, &work[i].base);
uncompressed_ptr += sub_block[i].uncompressed_size;
compressed_ptr += sub_block[i].compressed_size_bound;
......
......@@ -12,9 +12,6 @@
extern "C" {
#endif
void toku_set_default_compression_method (enum toku_compression_method a);
// Effect: for the following functions, set the default compression method.
static const int max_sub_blocks = 8;
static const int target_sub_block_size = 512*1024;
static const int max_basement_nodes = 32;
......@@ -46,11 +43,11 @@ size_t
sub_block_header_size(int n_sub_blocks);
void
set_compressed_size_bound(struct sub_block *se);
set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method);
// get the sum of the sub block compressed sizes
size_t
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[]);
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method);
// get the sum of the sub block uncompressed sizes
size_t
......@@ -76,27 +73,29 @@ get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offse
struct compress_work {
struct work base;
enum toku_compression_method method;
struct sub_block *sub_block;
};
void
compress_work_init(struct compress_work *w, struct sub_block *sub_block);
compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block);
u_int32_t
compress_nocrc_sub_block(
struct sub_block *sub_block,
void* sb_compressed_ptr,
u_int32_t cs_bound
struct sub_block *sub_block,
void* sb_compressed_ptr,
u_int32_t cs_bound,
enum toku_compression_method method
);
void
compress_sub_block(struct sub_block *sub_block);
compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method);
void *
compress_worker(void *arg);
size_t
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool);
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method);
struct decompress_work {
struct work base;
......
......@@ -22,6 +22,7 @@ static void test_compress_buf_method (unsigned char *buf, int i, enum toku_compr
static void test_compress_buf (unsigned char *buf, int i) {
test_compress_buf_method(buf, i, TOKU_ZLIB_METHOD);
test_compress_buf_method(buf, i, TOKU_QUICKLZ_METHOD);
test_compress_buf_method(buf, i, TOKU_LZMA_METHOD);
}
static void test_compress_i (int i) {
......
......@@ -8,6 +8,7 @@
#include <errno.h>
#include <string.h>
#include "compress.h"
#include "sub_block.h"
int verbose;
......@@ -25,7 +26,7 @@ set_uint8_at_offset(void *vp, size_t offset, uint8_t newv) {
}
static void
test_sub_block_checksum(void *buf, int total_size, int my_max_sub_blocks, int n_cores, struct toku_thread_pool *pool) {
test_sub_block_checksum(void *buf, int total_size, int my_max_sub_blocks, int n_cores, struct toku_thread_pool *pool, enum toku_compression_method method) {
if (verbose)
printf("%s:%d %d %d\n", __FUNCTION__, __LINE__, total_size, my_max_sub_blocks);
......@@ -40,11 +41,11 @@ test_sub_block_checksum(void *buf, int total_size, int my_max_sub_blocks, int n_
struct sub_block sub_blocks[n_sub_blocks];
set_all_sub_block_sizes(total_size, sub_block_size, n_sub_blocks, sub_blocks);
size_t cbuf_size_bound = get_sum_compressed_size_bound(n_sub_blocks, sub_blocks);
size_t cbuf_size_bound = get_sum_compressed_size_bound(n_sub_blocks, sub_blocks, method);
void *cbuf = toku_malloc(cbuf_size_bound);
assert(cbuf);
size_t cbuf_size = compress_all_sub_blocks(n_sub_blocks, sub_blocks, buf, cbuf, n_cores, pool);
size_t cbuf_size = compress_all_sub_blocks(n_sub_blocks, sub_blocks, buf, cbuf, n_cores, pool, method);
assert(cbuf_size <= cbuf_size_bound);
void *ubuf = toku_malloc(total_size);
......@@ -92,16 +93,16 @@ set_random(void *buf, int total_size) {
}
static void
run_test(int total_size, int n_cores, struct toku_thread_pool *pool) {
run_test(int total_size, int n_cores, struct toku_thread_pool *pool, enum toku_compression_method method) {
void *buf = toku_malloc(total_size);
assert(buf);
for (int my_max_sub_blocks = 1; my_max_sub_blocks <= max_sub_blocks; my_max_sub_blocks++) {
memset(buf, 0, total_size);
test_sub_block_checksum(buf, total_size, my_max_sub_blocks, n_cores, pool);
test_sub_block_checksum(buf, total_size, my_max_sub_blocks, n_cores, pool, method);
set_random(buf, total_size);
test_sub_block_checksum(buf, total_size, my_max_sub_blocks, n_cores, pool);
test_sub_block_checksum(buf, total_size, my_max_sub_blocks, n_cores, pool, method);
}
toku_free(buf);
......@@ -141,7 +142,10 @@ test_main (int argc, const char *argv[]) {
for (int total_size = 256*1024; total_size <= 4*1024*1024; total_size *= 2) {
for (int size = total_size - e; size <= total_size + e; size++) {
run_test(size, n_cores, pool);
run_test(size, n_cores, pool, TOKU_NO_COMPRESSION);
run_test(size, n_cores, pool, TOKU_ZLIB_METHOD);
run_test(size, n_cores, pool, TOKU_QUICKLZ_METHOD);
run_test(size, n_cores, pool, TOKU_LZMA_METHOD);
}
}
......
......@@ -13,7 +13,7 @@
int verbose;
static void
test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int n_cores) {
test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int n_cores, enum toku_compression_method method) {
if (verbose)
printf("%s:%d %d %d\n", __FUNCTION__, __LINE__, total_size, my_max_sub_blocks);
......@@ -28,11 +28,11 @@ test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int
struct sub_block sub_blocks[n_sub_blocks];
set_all_sub_block_sizes(total_size, sub_block_size, n_sub_blocks, sub_blocks);
size_t cbuf_size_bound = get_sum_compressed_size_bound(n_sub_blocks, sub_blocks);
size_t cbuf_size_bound = get_sum_compressed_size_bound(n_sub_blocks, sub_blocks, method);
void *cbuf = toku_malloc(cbuf_size_bound);
assert(cbuf);
size_t cbuf_size = compress_all_sub_blocks(n_sub_blocks, sub_blocks, buf, cbuf, n_cores, NULL);
size_t cbuf_size = compress_all_sub_blocks(n_sub_blocks, sub_blocks, buf, cbuf, n_cores, NULL, method);
assert(cbuf_size <= cbuf_size_bound);
void *ubuf = toku_malloc(total_size);
......@@ -55,16 +55,16 @@ set_random(void *buf, int total_size) {
}
static void
run_test(int total_size, int n_cores) {
run_test(int total_size, int n_cores, enum toku_compression_method method) {
void *buf = toku_malloc(total_size);
assert(buf);
for (int my_max_sub_blocks = 1; my_max_sub_blocks <= max_sub_blocks; my_max_sub_blocks++) {
memset(buf, 0, total_size);
test_sub_block_compression(buf, total_size, my_max_sub_blocks, n_cores);
test_sub_block_compression(buf, total_size, my_max_sub_blocks, n_cores, method);
set_random(buf, total_size);
test_sub_block_compression(buf, total_size, my_max_sub_blocks, n_cores);
test_sub_block_compression(buf, total_size, my_max_sub_blocks, n_cores, method);
}
toku_free(buf);
......@@ -96,7 +96,10 @@ test_main (int argc, const char *argv[]) {
for (int total_size = 256*1024; total_size <= 4*1024*1024; total_size *= 2) {
for (int size = total_size - e; size <= total_size + e; size++) {
run_test(size, n_cores);
run_test(size, n_cores, TOKU_NO_COMPRESSION);
run_test(size, n_cores, TOKU_ZLIB_METHOD);
run_test(size, n_cores, TOKU_QUICKLZ_METHOD);
run_test(size, n_cores, TOKU_LZMA_METHOD);
}
}
......
......@@ -80,8 +80,7 @@ export.def: export.map Makefile
buildlocktrees: $(LOCKTREE) $(RANGETREE) ;
$(LIBRARY): $(OBJS) $(LOCKTREE) $(RANGETREE) $(NEWBRT) $(DEPEND_COMPILE) $(DEPEND_LINK)
NEWBRT_OFILES = $(patsubst %,../%,$(shell cat ../lib/newbrt.olist))
$(LIBRARY): LINK_FILES=$(OBJS) $(LOCKTREE) $(RANGETREE) $(NEWBRT_OFILES)
$(LIBRARY): LINK_FILES=$(OBJS) $(LOCKTREE) $(RANGETREE) ../lib/libnewbrt.a $(LZMA_A)
ifeq ($(CC),icc)
ifeq ($(HAVE_CILK),1)
ifeq (0,0)
......
This diff is collapsed.
......@@ -579,6 +579,20 @@ toku_db_get_readpagesize(DB *db, u_int32_t *readpagesize_ptr) {
return r;
}
static int
toku_db_set_compression_method(DB *db, enum toku_compression_method compression_method) {
HANDLE_PANICKED_DB(db);
int r = toku_brt_set_compression_method(db->i->brt, compression_method);
return r;
}
static int
toku_db_get_compression_method(DB *db, enum toku_compression_method *compression_method_ptr) {
HANDLE_PANICKED_DB(db);
int r = toku_brt_get_compression_method(db->i->brt, compression_method_ptr);
return r;
}
static int
toku_db_stat64(DB * db, DB_TXN *txn, DB_BTREE_STAT64 *s) {
HANDLE_PANICKED_DB(db);
......@@ -834,6 +848,16 @@ locked_db_get_readpagesize(DB *db, u_int32_t *readpagesize_ptr) {
toku_ydb_lock(); int r = toku_db_get_readpagesize(db, readpagesize_ptr); toku_ydb_unlock(); return r;
}
static int
locked_db_set_compression_method(DB *db, enum toku_compression_method compression_method) {
toku_ydb_lock(); int r = toku_db_set_compression_method(db, compression_method); toku_ydb_unlock(); return r;
}
static int
locked_db_get_compression_method(DB *db, enum toku_compression_method *compression_method_ptr) {
toku_ydb_lock(); int r = toku_db_get_compression_method(db, compression_method_ptr); toku_ydb_unlock(); return r;
}
// TODO 2216 delete this
static int
locked_db_fd(DB * UU(db), int * UU(fdp)) {
......@@ -1033,6 +1057,8 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
SDB(get_pagesize);
SDB(set_readpagesize);
SDB(get_readpagesize);
SDB(set_compression_method);
SDB(get_compression_method);
SDB(set_flags);
SDB(get_flags);
SDB(fd);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment