Commit 3ed34102 authored by Leif Walsh's avatar Leif Walsh Committed by Yoni Fogel

[t:4635] merging fractal tree and handlerton code to main

git-svn-id: file:///svn/toku/tokudb@41503 c7de825b-a66e-492c-adef-691d508d4ae1
parent 7c6f71ce
......@@ -85,7 +85,7 @@ fastcheckonlyfailydb: fastbuildtests
clean: $(patsubst %,%.dir.clean,$(SRCDIRS)) cleanlib
cleanlib:
rm -rf lib/*.$(SOEXT) lib/*.$(AEXT) lib/*.bundle
rm -rf lib/*.$(SOEXT) $(filter-out lib/liblzma_gcc_dbg.$(AEXT) lib/liblzma_gcc_opt.$(AEXT) lib/liblzma_icc_opt.$(AEXT),$(wildcard lib/*.$(AEXT))) lib/*.bundle lib/*.olist
# This does not work, and probably hasn't worked since revision ~2000
# install:
......
......@@ -429,6 +429,8 @@ static void print_db_struct (void) {
"int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",
"int (*get_readpagesize)(DB*,u_int32_t*)",
"int (*set_readpagesize)(DB*,u_int32_t)",
"int (*get_compression_method)(DB*,TOKU_COMPRESSION_METHOD*)",
"int (*set_compression_method)(DB*,TOKU_COMPRESSION_METHOD)",
"int (*set_indexer)(DB*, DB_INDEXER*)",
"void (*get_indexer)(DB*, DB_INDEXER**)",
"int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)",
......@@ -554,6 +556,17 @@ int main (int argc, char *const argv[] __attribute__((__unused__))) {
printf(" u_int64_t bt_verify_time_sec; /* Time of last verification, in seconds */\n");
printf("} DB_BTREE_STAT64;\n");
// compression methods
printf("typedef enum toku_compression_method {\n");
printf(" TOKU_NO_COMPRESSION = 0,\n"); // "identity" compression
printf(" TOKU_ZLIB_METHOD = 8,\n"); // RFC 1950 says use 8 for zlib. It reserves 15 to allow more bytes.
printf(" TOKU_QUICKLZ_METHOD = 9,\n"); // We use 9 for QUICKLZ (the QLZ compression level is stored int he high-order nibble). I couldn't find any standard for any other numbers, so I just use 9. -Bradley
printf(" TOKU_LZMA_METHOD = 10,\n"); // We use 10 for LZMA. (Note the compression level is stored in the high-order nibble).
printf(" TOKU_FAST_COMPRESSION_METHOD = 1,\n"); // friendlier names
printf(" TOKU_SMALL_COMPRESSION_METHOD = 2,\n");
printf(" TOKU_DEFAULT_COMPRESSION_METHOD = TOKU_FAST_COMPRESSION_METHOD,\n"); // default is quicklz
printf("} TOKU_COMPRESSION_METHOD;\n");
//bulk loader
printf("typedef struct __toku_loader DB_LOADER;\n");
printf("struct __toku_loader_internal;\n");
......
......@@ -43,6 +43,15 @@ typedef struct __toku_db_btree_stat64 {
u_int64_t bt_modify_time_sec; /* Time of last serialization, in seconds */
u_int64_t bt_verify_time_sec; /* Time of last verification, in seconds */
} DB_BTREE_STAT64;
typedef enum toku_compression_method {
TOKU_NO_COMPRESSION = 0,
TOKU_ZLIB_METHOD = 8,
TOKU_QUICKLZ_METHOD = 9,
TOKU_LZMA_METHOD = 10,
TOKU_FAST_COMPRESSION_METHOD = 1,
TOKU_SMALL_COMPRESSION_METHOD = 2,
TOKU_DEFAULT_COMPRESSION_METHOD = TOKU_FAST_COMPRESSION_METHOD,
} TOKU_COMPRESSION_METHOD;
typedef struct __toku_loader DB_LOADER;
struct __toku_loader_internal;
struct __toku_loader {
......@@ -301,6 +310,8 @@ struct __toku_db {
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
int (*get_compression_method)(DB*,TOKU_COMPRESSION_METHOD*);
int (*set_compression_method)(DB*,TOKU_COMPRESSION_METHOD);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
......
/**
* \file api/lzma.h
* \brief The public API of liblzma data compression library
*
* liblzma is a public domain general-purpose data compression library with
* a zlib-like API. The native file format is .xz, but also the old .lzma
* format and raw (no headers) streams are supported. Multiple compression
* algorithms (filters) are supported. Currently LZMA2 is the primary filter.
*
* liblzma is part of XZ Utils <http://tukaani.org/xz/>. XZ Utils includes
* a gzip-like command line tool named xz and some other tools. XZ Utils
* is developed and maintained by Lasse Collin.
*
* Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK
* <http://7-zip.org/sdk.html>.
*
* The SHA-256 implementation is based on the public domain code found from
* 7-Zip <http://7-zip.org/>, which has a modified version of the public
* domain SHA-256 code found from Crypto++ <http://www.cryptopp.com/>.
* The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai.
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
#ifndef LZMA_H
#define LZMA_H
/*****************************
* Required standard headers *
*****************************/
/*
* liblzma API headers need some standard types and macros. To allow
* including lzma.h without requiring the application to include other
* headers first, lzma.h includes the required standard headers unless
* they already seem to be included already or if LZMA_MANUAL_HEADERS
* has been defined.
*
* Here's what types and macros are needed and from which headers:
* - stddef.h: size_t, NULL
* - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n),
* UINT32_MAX, UINT64_MAX
*
* However, inttypes.h is a little more portable than stdint.h, although
* inttypes.h declares some unneeded things compared to plain stdint.h.
*
* The hacks below aren't perfect, specifically they assume that inttypes.h
* exists and that it typedefs at least uint8_t, uint32_t, and uint64_t,
* and that, in case of incomplete inttypes.h, unsigned int is 32-bit.
* If the application already takes care of setting up all the types and
* macros properly (for example by using gnulib's stdint.h or inttypes.h),
* we try to detect that the macros are already defined and don't include
* inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to
* force this file to never include any system headers.
*
* Some could argue that liblzma API should provide all the required types,
* for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was
* seen unnecessary mess, since most systems already provide all the necessary
* types and macros in the standard headers.
*
* Note that liblzma API still has lzma_bool, because using stdbool.h would
* break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't
* necessarily the same as sizeof(bool) in C++.
*/
#ifndef LZMA_MANUAL_HEADERS
/*
* I suppose this works portably also in C++. Note that in C++,
* we need to get size_t into the global namespace.
*/
# include <stddef.h>
/*
* Skip inttypes.h if we already have all the required macros. If we
* have the macros, we assume that we have the matching typedefs too.
*/
# if !defined(UINT32_C) || !defined(UINT64_C) \
|| !defined(UINT32_MAX) || !defined(UINT64_MAX)
/*
* MSVC has no C99 support, and thus it cannot be used to
* compile liblzma. The liblzma API has to still be usable
* from MSVC, so we need to define the required standard
* integer types here.
*/
# if defined(_WIN32) && defined(_MSC_VER)
typedef unsigned __int8 uint8_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
# else
/* Use the standard inttypes.h. */
# ifdef __cplusplus
/*
* C99 sections 7.18.2 and 7.18.4 specify that
* in C++ implementations define the limit
* and constant macros only if specifically
* requested. Note that if you want the
* format macros (PRIu64 etc.) too, you need
* to define __STDC_FORMAT_MACROS before
* including lzma.h, since re-including
* inttypes.h with __STDC_FORMAT_MACROS
* defined doesn't necessarily work.
*/
# ifndef __STDC_LIMIT_MACROS
# define __STDC_LIMIT_MACROS 1
# endif
# ifndef __STDC_CONSTANT_MACROS
# define __STDC_CONSTANT_MACROS 1
# endif
# endif
# include <inttypes.h>
# endif
/*
* Some old systems have only the typedefs in inttypes.h, and
* lack all the macros. For those systems, we need a few more
* hacks. We assume that unsigned int is 32-bit and unsigned
* long is either 32-bit or 64-bit. If these hacks aren't
* enough, the application has to setup the types manually
* before including lzma.h.
*/
# ifndef UINT32_C
# if defined(_WIN32) && defined(_MSC_VER)
# define UINT32_C(n) n ## UI32
# else
# define UINT32_C(n) n ## U
# endif
# endif
# ifndef UINT64_C
# if defined(_WIN32) && defined(_MSC_VER)
# define UINT64_C(n) n ## UI64
# else
/* Get ULONG_MAX. */
# include <limits.h>
# if ULONG_MAX == 4294967295UL
# define UINT64_C(n) n ## ULL
# else
# define UINT64_C(n) n ## UL
# endif
# endif
# endif
# ifndef UINT32_MAX
# define UINT32_MAX (UINT32_C(4294967295))
# endif
# ifndef UINT64_MAX
# define UINT64_MAX (UINT64_C(18446744073709551615))
# endif
# endif
#endif /* ifdef LZMA_MANUAL_HEADERS */
/******************
* LZMA_API macro *
******************/
/*
* Some systems require (or at least recommend) that the functions and
* function pointers are declared specially in the headers. LZMA_API_IMPORT
* is for importing symbols and LZMA_API_CALL is to specify calling
* convention.
*
* By default it is assumed that the application will link dynamically
* against liblzma. #define LZMA_API_STATIC in your application if you
* want to link against static liblzma. If you don't care about portability
* to operating systems like Windows, or at least don't care about linking
* against static liblzma on them, don't worry about LZMA_API_STATIC. That
* is, most developers will never need to use LZMA_API_STATIC.
*
* Cygwin is a special case on Windows. We rely on GCC doing the right thing
* and thus don't use dllimport and don't specify the calling convention.
*/
#ifndef LZMA_API_IMPORT
# if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__CYGWIN__)
# define LZMA_API_IMPORT __declspec(dllimport)
# else
# define LZMA_API_IMPORT
# endif
#endif
#ifndef LZMA_API_CALL
# if defined(_WIN32) && !defined(__CYGWIN__)
# define LZMA_API_CALL __cdecl
# else
# define LZMA_API_CALL
# endif
#endif
#ifndef LZMA_API
# define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL
#endif
/***********
* nothrow *
***********/
/*
* None of the functions in liblzma may throw an exception. Even
* the functions that use callback functions won't throw exceptions,
* because liblzma would break if a callback function threw an exception.
*/
#ifndef lzma_nothrow
# if defined(__cplusplus)
# define lzma_nothrow throw()
# elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
# define lzma_nothrow __attribute__((__nothrow__))
# else
# define lzma_nothrow
# endif
#endif
/********************
* GNU C extensions *
********************/
/*
* GNU C extensions are used conditionally in the public API. It doesn't
* break anything if these are sometimes enabled and sometimes not, only
* affects warnings and optimizations.
*/
#if __GNUC__ >= 3
# ifndef lzma_attribute
# define lzma_attribute(attr) __attribute__(attr)
# endif
# ifndef lzma_restrict
# define lzma_restrict __restrict__
# endif
/* warn_unused_result was added in GCC 3.4. */
# ifndef lzma_attr_warn_unused_result
# if __GNUC__ == 3 && __GNUC_MINOR__ < 4
# define lzma_attr_warn_unused_result
# endif
# endif
#else
# ifndef lzma_attribute
# define lzma_attribute(attr)
# endif
# ifndef lzma_restrict
# if __STDC_VERSION__ >= 199901L
# define lzma_restrict restrict
# else
# define lzma_restrict
# endif
# endif
#endif
#ifndef lzma_attr_pure
# define lzma_attr_pure lzma_attribute((__pure__))
#endif
#ifndef lzma_attr_const
# define lzma_attr_const lzma_attribute((__const__))
#endif
#ifndef lzma_attr_warn_unused_result
# define lzma_attr_warn_unused_result \
lzma_attribute((__warn_unused_result__))
#endif
/**************
* Subheaders *
**************/
#ifdef __cplusplus
extern "C" {
#endif
/*
* Subheaders check that this is defined. It is to prevent including
* them directly from applications.
*/
#define LZMA_H_INTERNAL 1
/* Basic features */
#include "lzma/version.h"
#include "lzma/base.h"
#include "lzma/vli.h"
#include "lzma/check.h"
/* Filters */
#include "lzma/filter.h"
#include "lzma/subblock.h"
#include "lzma/bcj.h"
#include "lzma/delta.h"
#include "lzma/lzma.h"
/* Container formats */
#include "lzma/container.h"
/* Advanced features */
#include "lzma/stream_flags.h"
#include "lzma/block.h"
#include "lzma/index.h"
#include "lzma/index_hash.h"
/*
* All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications
* re-including the subheaders.
*/
#undef LZMA_H_INTERNAL
#ifdef __cplusplus
}
#endif
#endif /* ifndef LZMA_H */
/**
* \file lzma/base.h
* \brief Data types and functions used in many places in liblzma API
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Boolean
*
* This is here because C89 doesn't have stdbool.h. To set a value for
* variables having type lzma_bool, you can use
* - C99's `true' and `false' from stdbool.h;
* - C++'s internal `true' and `false'; or
* - integers one (true) and zero (false).
*/
typedef unsigned char lzma_bool;
/**
* \brief Type of reserved enumeration variable in structures
*
* To avoid breaking library ABI when new features are added, several
* structures contain extra variables that may be used in future. Since
* sizeof(enum) can be different than sizeof(int), and sizeof(enum) may
* even vary depending on the range of enumeration constants, we specify
* a separate type to be used for reserved enumeration variables. All
* enumeration constants in liblzma API will be non-negative and less
* than 128, which should guarantee that the ABI won't break even when
* new constants are added to existing enumerations.
*/
typedef enum {
LZMA_RESERVED_ENUM = 0
} lzma_reserved_enum;
/**
* \brief Return values used by several functions in liblzma
*
* Check the descriptions of specific functions to find out which return
* values they can return. With some functions the return values may have
* more specific meanings than described here; those differences are
* described per-function basis.
*/
typedef enum {
LZMA_OK = 0,
/**<
* \brief Operation completed successfully
*/
LZMA_STREAM_END = 1,
/**<
* \brief End of stream was reached
*
* In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or
* LZMA_FINISH was finished. In decoder, this indicates
* that all the data was successfully decoded.
*
* In all cases, when LZMA_STREAM_END is returned, the last
* output bytes should be picked from strm->next_out.
*/
LZMA_NO_CHECK = 2,
/**<
* \brief Input stream has no integrity check
*
* This return value can be returned only if the
* LZMA_TELL_NO_CHECK flag was used when initializing
* the decoder. LZMA_NO_CHECK is just a warning, and
* the decoding can be continued normally.
*
* It is possible to call lzma_get_check() immediatelly after
* lzma_code has returned LZMA_NO_CHECK. The result will
* naturally be LZMA_CHECK_NONE, but the possibility to call
* lzma_get_check() may be convenient in some applications.
*/
LZMA_UNSUPPORTED_CHECK = 3,
/**<
* \brief Cannot calculate the integrity check
*
* The usage of this return value is different in encoders
* and decoders.
*
* Encoders can return this value only from the initialization
* function. If initialization fails with this value, the
* encoding cannot be done, because there's no way to produce
* output with the correct integrity check.
*
* Decoders can return this value only from lzma_code() and
* only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when
* initializing the decoder. The decoding can still be
* continued normally even if the check type is unsupported,
* but naturally the check will not be validated, and possible
* errors may go undetected.
*
* With decoder, it is possible to call lzma_get_check()
* immediatelly after lzma_code() has returned
* LZMA_UNSUPPORTED_CHECK. This way it is possible to find
* out what the unsupported Check ID was.
*/
LZMA_GET_CHECK = 4,
/**<
* \brief Integrity check type is now available
*
* This value can be returned only by the lzma_code() function
* and only if the decoder was initialized with the
* LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the
* application that it may now call lzma_get_check() to find
* out the Check ID. This can be used, for example, to
* implement a decoder that accepts only files that have
* strong enough integrity check.
*/
LZMA_MEM_ERROR = 5,
/**<
* \brief Cannot allocate memory
*
* Memory allocation failed, or the size of the allocation
* would be greater than SIZE_MAX.
*
* Due to internal implementation reasons, the coding cannot
* be continued even if more memory were made available after
* LZMA_MEM_ERROR.
*/
LZMA_MEMLIMIT_ERROR = 6,
/**
* \brief Memory usage limit was reached
*
* Decoder would need more memory than allowed by the
* specified memory usage limit. To continue decoding,
* the memory usage limit has to be increased with
* lzma_memlimit_set().
*/
LZMA_FORMAT_ERROR = 7,
/**<
* \brief File format not recognized
*
* The decoder did not recognize the input as supported file
* format. This error can occur, for example, when trying to
* decode .lzma format file with lzma_stream_decoder,
* because lzma_stream_decoder accepts only the .xz format.
*/
LZMA_OPTIONS_ERROR = 8,
/**<
* \brief Invalid or unsupported options
*
* Invalid or unsupported options, for example
* - unsupported filter(s) or filter options; or
* - reserved bits set in headers (decoder only).
*
* Rebuilding liblzma with more features enabled, or
* upgrading to a newer version of liblzma may help.
*/
LZMA_DATA_ERROR = 9,
/**<
* \brief Data is corrupt
*
* The usage of this return value is different in encoders
* and decoders. In both encoder and decoder, the coding
* cannot continue after this error.
*
* Encoders return this if size limits of the target file
* format would be exceeded. These limits are huge, thus
* getting this error from an encoder is mostly theoretical.
* For example, the maximum compressed and uncompressed
* size of a .xz Stream is roughly 8 EiB (2^63 bytes).
*
* Decoders return this error if the input data is corrupt.
* This can mean, for example, invalid CRC32 in headers
* or invalid check of uncompressed data.
*/
LZMA_BUF_ERROR = 10,
/**<
* \brief No progress is possible
*
* This error code is returned when the coder cannot consume
* any new input and produce any new output. The most common
* reason for this error is that the input stream being
* decoded is truncated or corrupt.
*
* This error is not fatal. Coding can be continued normally
* by providing more input and/or more output space, if
* possible.
*
* Typically the first call to lzma_code() that can do no
* progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only
* the second consecutive call doing no progress will return
* LZMA_BUF_ERROR. This is intentional.
*
* With zlib, Z_BUF_ERROR may be returned even if the
* application is doing nothing wrong, so apps will need
* to handle Z_BUF_ERROR specially. The above hack
* guarantees that liblzma never returns LZMA_BUF_ERROR
* to properly written applications unless the input file
* is truncated or corrupt. This should simplify the
* applications a little.
*/
LZMA_PROG_ERROR = 11,
/**<
* \brief Programming error
*
* This indicates that the arguments given to the function are
* invalid or the internal state of the decoder is corrupt.
* - Function arguments are invalid or the structures
* pointed by the argument pointers are invalid
* e.g. if strm->next_out has been set to NULL and
* strm->avail_out > 0 when calling lzma_code().
* - lzma_* functions have been called in wrong order
* e.g. lzma_code() was called right after lzma_end().
* - If errors occur randomly, the reason might be flaky
* hardware.
*
* If you think that your code is correct, this error code
* can be a sign of a bug in liblzma. See the documentation
* how to report bugs.
*/
} lzma_ret;
/**
* \brief The `action' argument for lzma_code()
*
* After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH,
* the same `action' must is used until lzma_code() returns LZMA_STREAM_END.
* Also, the amount of input (that is, strm->avail_in) must not be modified
* by the application until lzma_code() returns LZMA_STREAM_END. Changing the
* `action' or modifying the amount of input will make lzma_code() return
* LZMA_PROG_ERROR.
*/
typedef enum {
LZMA_RUN = 0,
/**<
* \brief Continue coding
*
* Encoder: Encode as much input as possible. Some internal
* buffering will probably be done (depends on the filter
* chain in use), which causes latency: the input used won't
* usually be decodeable from the output of the same
* lzma_code() call.
*
* Decoder: Decode as much input as possible and produce as
* much output as possible.
*/
LZMA_SYNC_FLUSH = 1,
/**<
* \brief Make all the input available at output
*
* Normally the encoder introduces some latency.
* LZMA_SYNC_FLUSH forces all the buffered data to be
* available at output without resetting the internal
* state of the encoder. This way it is possible to use
* compressed stream for example for communication over
* network.
*
* Only some filters support LZMA_SYNC_FLUSH. Trying to use
* LZMA_SYNC_FLUSH with filters that don't support it will
* make lzma_code() return LZMA_OPTIONS_ERROR. For example,
* LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does.
*
* Using LZMA_SYNC_FLUSH very often can dramatically reduce
* the compression ratio. With some filters (for example,
* LZMA2), finetuning the compression options may help
* mitigate this problem significantly.
*
* Decoders don't support LZMA_SYNC_FLUSH.
*/
LZMA_FULL_FLUSH = 2,
/**<
* \brief Make all the input available at output
*
* Finish encoding of the current Block. All the input
* data going to the current Block must have been given
* to the encoder (the last bytes can still be pending in
* next_in). Call lzma_code() with LZMA_FULL_FLUSH until
* it returns LZMA_STREAM_END. Then continue normally with
* LZMA_RUN or finish the Stream with LZMA_FINISH.
*
* This action is currently supported only by Stream encoder
* and easy encoder (which uses Stream encoder). If there is
* no unfinished Block, no empty Block is created.
*/
LZMA_FINISH = 3
/**<
* \brief Finish the coding operation
*
* Finishes the coding operation. All the input data must
* have been given to the encoder (the last bytes can still
* be pending in next_in). Call lzma_code() with LZMA_FINISH
* until it returns LZMA_STREAM_END. Once LZMA_FINISH has
* been used, the amount of input must no longer be changed
* by the application.
*
* When decoding, using LZMA_FINISH is optional unless the
* LZMA_CONCATENATED flag was used when the decoder was
* initialized. When LZMA_CONCATENATED was not used, the only
* effect of LZMA_FINISH is that the amount of input must not
* be changed just like in the encoder.
*/
} lzma_action;
/**
* \brief Custom functions for memory handling
*
* A pointer to lzma_allocator may be passed via lzma_stream structure
* to liblzma, and some advanced functions take a pointer to lzma_allocator
* as a separate function argument. The library will use the functions
* specified in lzma_allocator for memory handling instead of the default
* malloc() and free(). C++ users should note that the custom memory
* handling functions must not throw exceptions.
*
* liblzma doesn't make an internal copy of lzma_allocator. Thus, it is
* OK to change these function pointers in the middle of the coding
* process, but obviously it must be done carefully to make sure that the
* replacement `free' can deallocate memory allocated by the earlier
* `alloc' function(s).
*/
typedef struct {
/**
* \brief Pointer to a custom memory allocation function
*
* If you don't want a custom allocator, but still want
* custom free(), set this to NULL and liblzma will use
* the standard malloc().
*
* \param opaque lzma_allocator.opaque (see below)
* \param nmemb Number of elements like in calloc(). liblzma
* will always set nmemb to 1, so it is safe to
* ignore nmemb in a custom allocator if you like.
* The nmemb argument exists only for
* compatibility with zlib and libbzip2.
* \param size Size of an element in bytes.
* liblzma never sets this to zero.
*
* \return Pointer to the beginning of a memory block of
* `size' bytes, or NULL if allocation fails
* for some reason. When allocation fails, functions
* of liblzma return LZMA_MEM_ERROR.
*
* The allocator should not waste time zeroing the allocated buffers.
* This is not only about speed, but also memory usage, since the
* operating system kernel doesn't necessarily allocate the requested
* memory in physical memory until it is actually used. With small
* input files, liblzma may actually need only a fraction of the
* memory that it requested for allocation.
*
* \note LZMA_MEM_ERROR is also used when the size of the
* allocation would be greater than SIZE_MAX. Thus,
* don't assume that the custom allocator must have
* returned NULL if some function from liblzma
* returns LZMA_MEM_ERROR.
*/
void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size);
/**
* \brief Pointer to a custom memory freeing function
*
* If you don't want a custom freeing function, but still
* want a custom allocator, set this to NULL and liblzma
* will use the standard free().
*
* \param opaque lzma_allocator.opaque (see below)
* \param ptr Pointer returned by lzma_allocator.alloc(),
* or when it is set to NULL, a pointer returned
* by the standard malloc().
*/
void (LZMA_API_CALL *free)(void *opaque, void *ptr);
/**
* \brief Pointer passed to .alloc() and .free()
*
* opaque is passed as the first argument to lzma_allocator.alloc()
* and lzma_allocator.free(). This intended to ease implementing
* custom memory allocation functions for use with liblzma.
*
* If you don't need this, you should set this to NULL.
*/
void *opaque;
} lzma_allocator;
/**
* \brief Internal data structure
*
* The contents of this structure is not visible outside the library.
*/
typedef struct lzma_internal_s lzma_internal;
/**
* \brief Passing data to and from liblzma
*
* The lzma_stream structure is used for
* - passing pointers to input and output buffers to liblzma;
* - defining custom memory hander functions; and
* - holding a pointer to coder-specific internal data structures.
*
* Typical usage:
*
* - After allocating lzma_stream (on stack or with malloc()), it must be
* initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details).
*
* - Initialize a coder to the lzma_stream, for example by using
* lzma_easy_encoder() or lzma_auto_decoder(). Some notes:
* - In contrast to zlib, strm->next_in and strm->next_out are
* ignored by all initialization functions, thus it is safe
* to not initialize them yet.
* - The initialization functions always set strm->total_in and
* strm->total_out to zero.
* - If the initialization function fails, no memory is left allocated
* that would require freeing with lzma_end() even if some memory was
* associated with the lzma_stream structure when the initialization
* function was called.
*
* - Use lzma_code() to do the actual work.
*
* - Once the coding has been finished, the existing lzma_stream can be
* reused. It is OK to reuse lzma_stream with different initialization
* function without calling lzma_end() first. Old allocations are
* automatically freed.
*
* - Finally, use lzma_end() to free the allocated memory. lzma_end() never
* frees the lzma_stream structure itself.
*
* Application may modify the values of total_in and total_out as it wants.
* They are updated by liblzma to match the amount of data read and
* written, but aren't used for anything else.
*/
typedef struct {
const uint8_t *next_in; /**< Pointer to the next input byte. */
size_t avail_in; /**< Number of available input bytes in next_in. */
uint64_t total_in; /**< Total number of bytes read by liblzma. */
uint8_t *next_out; /**< Pointer to the next output position. */
size_t avail_out; /**< Amount of free space in next_out. */
uint64_t total_out; /**< Total number of bytes written by liblzma. */
/**
* \brief Custom memory allocation functions
*
* In most cases this is NULL which makes liblzma use
* the standard malloc() and free().
*/
lzma_allocator *allocator;
/** Internal state is not visible to applications. */
lzma_internal *internal;
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. Excluding the initialization of this structure,
* you should not touch these, because the names of these variables
* may change.
*/
void *reserved_ptr1;
void *reserved_ptr2;
uint64_t reserved_int1;
uint64_t reserved_int2;
lzma_reserved_enum reserved_enum1;
lzma_reserved_enum reserved_enum2;
} lzma_stream;
/**
* \brief Initialization for lzma_stream
*
* When you declare an instance of lzma_stream, you can immediatelly
* initialize it so that initialization functions know that no memory
* has been allocated yet:
*
* lzma_stream strm = LZMA_STREAM_INIT;
*
* If you need to initialize a dynamically allocated lzma_stream, you can use
* memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this
* violates the C standard since NULL may have different internal
* representation than zero, but it should be portable enough in practice.
* Anyway, for maximum portability, you can use something like this:
*
* lzma_stream tmp = LZMA_STREAM_INIT;
* *strm = tmp;
*/
#define LZMA_STREAM_INIT \
{ NULL, 0, 0, NULL, 0, 0, NULL, NULL, \
NULL, NULL, 0, 0, LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM }
/**
* \brief Encode or decode data
*
* Once the lzma_stream has been successfully initialized (e.g. with
* lzma_stream_encoder()), the actual encoding or decoding is done
* using this function. The application has to update strm->next_in,
* strm->avail_in, strm->next_out, and strm->avail_out to pass input
* to and get output from liblzma.
*
* See the description of the coder-specific initialization function to find
* out what `action' values are supported by the coder.
*/
extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Free memory allocated for the coder data structures
*
* \param strm Pointer to lzma_stream that is at least initialized
* with LZMA_STREAM_INIT.
*
* After lzma_end(strm), strm->internal is guaranteed to be NULL. No other
* members of the lzma_stream structure are touched.
*
* \note zlib indicates an error if application end()s unfinished
* stream structure. liblzma doesn't do this, and assumes that
* application knows what it is doing.
*/
extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow;
/**
* \brief Get the memory usage of decoder filter chain
*
* This function is currently supported only when *strm has been initialized
* with a function that takes a memlimit argument. With other functions, you
* should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage()
* to estimate the memory requirements.
*
* This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big
* the memory usage limit should have been to decode the input. Note that
* this may give misleading information if decoding .xz Streams that have
* multiple Blocks, because each Block can have different memory requirements.
*
* \return Rough estimate of how much memory is currently allocated
* for the filter decoders. If no filter chain is currently
* allocated, some non-zero value is still returned, which is
* less than or equal to what any filter chain would indicate
* as its memory requirement.
*
* If this function isn't supported by *strm or some other error
* occurs, zero is returned.
*/
extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm)
lzma_nothrow lzma_attr_pure;
/**
* \brief Get the current memory usage limit
*
* This function is supported only when *strm has been initialized with
* a function that takes a memlimit argument.
*
* \return On success, the current memory usage limit is returned
* (always non-zero). On error, zero is returned.
*/
extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm)
lzma_nothrow lzma_attr_pure;
/**
* \brief Set the memory usage limit
*
* This function is supported only when *strm has been initialized with
* a function that takes a memlimit argument.
*
* \return - LZMA_OK: New memory usage limit successfully set.
* - LZMA_MEMLIMIT_ERROR: The new limit is too small.
* The limit was not changed.
* - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't
* support memory usage limit or memlimit was zero.
*/
extern LZMA_API(lzma_ret) lzma_memlimit_set(
lzma_stream *strm, uint64_t memlimit) lzma_nothrow;
/**
* \file lzma/bcj.h
* \brief Branch/Call/Jump conversion filters
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/* Filter IDs for lzma_filter.id */
#define LZMA_FILTER_X86 LZMA_VLI_C(0x04)
/**<
* Filter for x86 binaries
*/
#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05)
/**<
* Filter for Big endian PowerPC binaries
*/
#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06)
/**<
* Filter for IA64 (Itanium) binaries.
*/
#define LZMA_FILTER_ARM LZMA_VLI_C(0x07)
/**<
* Filter for ARM binaries.
*/
#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08)
/**<
* Filter for ARMThumb binaries.
*/
#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09)
/**<
* Filter for SPARC binaries.
*/
/**
* \brief Options for BCJ filters
*
* The BCJ filters never change the size of the data. Specifying options
* for them is optional: if pointer to options is NULL, default value is
* used. You probably never need to specify options to BCJ filters, so just
* set the options pointer to NULL and be happy.
*
* If options with non-default values have been specified when encoding,
* the same options must also be specified when decoding.
*
* \note At the moment, none of the BCJ filters support
* LZMA_SYNC_FLUSH. If LZMA_SYNC_FLUSH is specified,
* LZMA_OPTIONS_ERROR will be returned. If there is need,
* partial support for LZMA_SYNC_FLUSH can be added in future.
* Partial means that flushing would be possible only at
* offsets that are multiple of 2, 4, or 16 depending on
* the filter, except x86 which cannot be made to support
* LZMA_SYNC_FLUSH predictably.
*/
typedef struct {
/**
* \brief Start offset for conversions
*
* This setting is useful only when the same filter is used
* _separately_ for multiple sections of the same executable file,
* and the sections contain cross-section branch/call/jump
* instructions. In that case it is benefical to set the start
* offset of the non-first sections so that the relative addresses
* of the cross-section branch/call/jump instructions will use the
* same absolute addresses as in the first section.
*
* When the pointer to options is NULL, the default value (zero)
* is used.
*/
uint32_t start_offset;
} lzma_options_bcj;
/**
* \file lzma/block.h
* \brief .xz Block handling
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Options for the Block and Block Header encoders and decoders
*
* Different Block handling functions use different parts of this structure.
* Some read some members, other functions write, and some do both. Only the
* members listed for reading need to be initialized when the specified
* functions are called. The members marked for writing will be assigned
* new values at some point either by calling the given function or by
* later calls to lzma_code().
*/
typedef struct {
/**
* \brief Block format version
*
* To prevent API and ABI breakages if new features are needed in
* Block, a version number is used to indicate which fields in this
* structure are in use. For now, version must always be zero.
* With non-zero version, most Block related functions will return
* LZMA_OPTIONS_ERROR.
*
* The decoding functions will always set this to the lowest value
* that supports all the features indicated by the Block Header field.
* The application must check that the version number set by the
* decoding functions is supported by the application. Otherwise it
* is possible that the application will decode the Block incorrectly.
*
* Read by:
* - lzma_block_header_size()
* - lzma_block_header_encode()
* - lzma_block_compressed_size()
* - lzma_block_unpadded_size()
* - lzma_block_total_size()
* - lzma_block_encoder()
* - lzma_block_decoder()
* - lzma_block_buffer_encode()
* - lzma_block_buffer_decode()
*
* Written by:
* - lzma_block_header_decode()
*/
uint32_t version;
/**
* \brief Size of the Block Header field
*
* This is always a multiple of four.
*
* Read by:
* - lzma_block_header_encode()
* - lzma_block_header_decode()
* - lzma_block_compressed_size()
* - lzma_block_unpadded_size()
* - lzma_block_total_size()
* - lzma_block_decoder()
* - lzma_block_buffer_decode()
*
* Written by:
* - lzma_block_header_size()
* - lzma_block_buffer_encode()
*/
uint32_t header_size;
# define LZMA_BLOCK_HEADER_SIZE_MIN 8
# define LZMA_BLOCK_HEADER_SIZE_MAX 1024
/**
* \brief Type of integrity Check
*
* The Check ID is not stored into the Block Header, thus its value
* must be provided also when decoding.
*
* Read by:
* - lzma_block_header_encode()
* - lzma_block_header_decode()
* - lzma_block_compressed_size()
* - lzma_block_unpadded_size()
* - lzma_block_total_size()
* - lzma_block_encoder()
* - lzma_block_decoder()
* - lzma_block_buffer_encode()
* - lzma_block_buffer_decode()
*/
lzma_check check;
/**
* \brief Size of the Compressed Data in bytes
*
* Encoding: If this is not LZMA_VLI_UNKNOWN, Block Header encoder
* will store this value to the Block Header. Block encoder doesn't
* care about this value, but will set it once the encoding has been
* finished.
*
* Decoding: If this is not LZMA_VLI_UNKNOWN, Block decoder will
* verify that the size of the Compressed Data field matches
* compressed_size.
*
* Usually you don't know this value when encoding in streamed mode,
* and thus cannot write this field into the Block Header.
*
* In non-streamed mode you can reserve space for this field before
* encoding the actual Block. After encoding the data, finish the
* Block by encoding the Block Header. Steps in detail:
*
* - Set compressed_size to some big enough value. If you don't know
* better, use LZMA_VLI_MAX, but remember that bigger values take
* more space in Block Header.
*
* - Call lzma_block_header_size() to see how much space you need to
* reserve for the Block Header.
*
* - Encode the Block using lzma_block_encoder() and lzma_code().
* It sets compressed_size to the correct value.
*
* - Use lzma_block_header_encode() to encode the Block Header.
* Because space was reserved in the first step, you don't need
* to call lzma_block_header_size() anymore, because due to
* reserving, header_size has to be big enough. If it is "too big",
* lzma_block_header_encode() will add enough Header Padding to
* make Block Header to match the size specified by header_size.
*
* Read by:
* - lzma_block_header_size()
* - lzma_block_header_encode()
* - lzma_block_compressed_size()
* - lzma_block_unpadded_size()
* - lzma_block_total_size()
* - lzma_block_decoder()
* - lzma_block_buffer_decode()
*
* Written by:
* - lzma_block_header_decode()
* - lzma_block_compressed_size()
* - lzma_block_encoder()
* - lzma_block_decoder()
* - lzma_block_buffer_encode()
* - lzma_block_buffer_decode()
*/
lzma_vli compressed_size;
/**
* \brief Uncompressed Size in bytes
*
* This is handled very similarly to compressed_size above.
*
* uncompressed_size is needed by fewer functions than
* compressed_size. This is because uncompressed_size isn't
* needed to validate that Block stays within proper limits.
*
* Read by:
* - lzma_block_header_size()
* - lzma_block_header_encode()
* - lzma_block_decoder()
* - lzma_block_buffer_decode()
*
* Written by:
* - lzma_block_header_decode()
* - lzma_block_encoder()
* - lzma_block_decoder()
* - lzma_block_buffer_encode()
* - lzma_block_buffer_decode()
*/
lzma_vli uncompressed_size;
/**
* \brief Array of filters
*
* There can be 1-4 filters. The end of the array is marked with
* .id = LZMA_VLI_UNKNOWN.
*
* Read by:
* - lzma_block_header_size()
* - lzma_block_header_encode()
* - lzma_block_encoder()
* - lzma_block_decoder()
* - lzma_block_buffer_encode()
* - lzma_block_buffer_decode()
*
* Written by:
* - lzma_block_header_decode(): Note that this does NOT free()
* the old filter options structures. All unused filters[] will
* have .id == LZMA_VLI_UNKNOWN and .options == NULL. If
* decoding fails, all filters[] are guaranteed to be
* LZMA_VLI_UNKNOWN and NULL.
*
* \note Because of the array is terminated with
* .id = LZMA_VLI_UNKNOWN, the actual array must
* have LZMA_FILTERS_MAX + 1 members or the Block
* Header decoder will overflow the buffer.
*/
lzma_filter *filters;
/**
* \brief Raw value stored in the Check field
*
* After successful coding, the first lzma_check_size(check) bytes
* of this array contain the raw value stored in the Check field.
*
* Note that CRC32 and CRC64 are stored in little endian byte order.
* Take it into account if you display the Check values to the user.
*
* Written by:
* - lzma_block_encoder()
* - lzma_block_decoder()
* - lzma_block_buffer_encode()
* - lzma_block_buffer_decode()
*/
uint8_t raw_check[LZMA_CHECK_SIZE_MAX];
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. You should not touch these, because the names
* of these variables may change. These are and will never be used
* with the currently supported options, so it is safe to leave these
* uninitialized.
*/
void *reserved_ptr1;
void *reserved_ptr2;
void *reserved_ptr3;
uint32_t reserved_int1;
uint32_t reserved_int2;
lzma_vli reserved_int3;
lzma_vli reserved_int4;
lzma_vli reserved_int5;
lzma_vli reserved_int6;
lzma_vli reserved_int7;
lzma_vli reserved_int8;
lzma_reserved_enum reserved_enum1;
lzma_reserved_enum reserved_enum2;
lzma_reserved_enum reserved_enum3;
lzma_reserved_enum reserved_enum4;
lzma_bool reserved_bool1;
lzma_bool reserved_bool2;
lzma_bool reserved_bool3;
lzma_bool reserved_bool4;
lzma_bool reserved_bool5;
lzma_bool reserved_bool6;
lzma_bool reserved_bool7;
lzma_bool reserved_bool8;
} lzma_block;
/**
* \brief Decode the Block Header Size field
*
* To decode Block Header using lzma_block_header_decode(), the size of the
* Block Header has to be known and stored into lzma_block.header_size.
* The size can be calculated from the first byte of a Block using this macro.
* Note that if the first byte is 0x00, it indicates beginning of Index; use
* this macro only when the byte is not 0x00.
*
* There is no encoding macro, because Block Header encoder is enough for that.
*/
#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4)
/**
* \brief Calculate Block Header Size
*
* Calculate the minimum size needed for the Block Header field using the
* settings specified in the lzma_block structure. Note that it is OK to
* increase the calculated header_size value as long as it is a multiple of
* four and doesn't exceed LZMA_BLOCK_HEADER_SIZE_MAX. Increasing header_size
* just means that lzma_block_header_encode() will add Header Padding.
*
* \return - LZMA_OK: Size calculated successfully and stored to
* block->header_size.
* - LZMA_OPTIONS_ERROR: Unsupported version, filters or
* filter options.
* - LZMA_PROG_ERROR: Invalid values like compressed_size == 0.
*
* \note This doesn't check that all the options are valid i.e. this
* may return LZMA_OK even if lzma_block_header_encode() or
* lzma_block_encoder() would fail. If you want to validate the
* filter chain, consider using lzma_memlimit_encoder() which as
* a side-effect validates the filter chain.
*/
extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Encode Block Header
*
* The caller must have calculated the size of the Block Header already with
* lzma_block_header_size(). If a value larger than the one calculated by
* lzma_block_header_size() is used, the Block Header will be padded to the
* specified size.
*
* \param out Beginning of the output buffer. This must be
* at least block->header_size bytes.
* \param block Block options to be encoded.
*
* \return - LZMA_OK: Encoding was successful. block->header_size
* bytes were written to output buffer.
* - LZMA_OPTIONS_ERROR: Invalid or unsupported options.
* - LZMA_PROG_ERROR: Invalid arguments, for example
* block->header_size is invalid or block->filters is NULL.
*/
extern LZMA_API(lzma_ret) lzma_block_header_encode(
const lzma_block *block, uint8_t *out)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode Block Header
*
* The size of the Block Header must have already been decoded with
* lzma_block_header_size_decode() macro and stored to block->header_size.
* block->filters must have been allocated, but not necessarily initialized.
* Possible existing filter options are _not_ freed.
*
* \param block Destination for block options with header_size
* properly initialized.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() (and also free()
* if an error occurs).
* \param in Beginning of the input buffer. This must be
* at least block->header_size bytes.
*
* \return - LZMA_OK: Decoding was successful. block->header_size
* bytes were read from the input buffer.
* - LZMA_OPTIONS_ERROR: The Block Header specifies some
* unsupported options such as unsupported filters.
* - LZMA_DATA_ERROR: Block Header is corrupt, for example,
* the CRC32 doesn't match.
* - LZMA_PROG_ERROR: Invalid arguments, for example
* block->header_size is invalid or block->filters is NULL.
*/
extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block,
lzma_allocator *allocator, const uint8_t *in)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Validate and set Compressed Size according to Unpadded Size
*
* Block Header stores Compressed Size, but Index has Unpadded Size. If the
* application has already parsed the Index and is now decoding Blocks,
* it can calculate Compressed Size from Unpadded Size. This function does
* exactly that with error checking:
*
* - Compressed Size calculated from Unpadded Size must be positive integer,
* that is, Unpadded Size must be big enough that after Block Header and
* Check fields there's still at least one byte for Compressed Size.
*
* - If Compressed Size was present in Block Header, the new value
* calculated from Unpadded Size is compared against the value
* from Block Header.
*
* \note This function must be called _after_ decoding the Block Header
* field so that it can properly validate Compressed Size if it
* was present in Block Header.
*
* \return - LZMA_OK: block->compressed_size was set successfully.
* - LZMA_DATA_ERROR: unpadded_size is too small compared to
* block->header_size and lzma_check_size(block->check).
* - LZMA_PROG_ERROR: Some values are invalid. For example,
* block->header_size must be a multiple of four and
* between 8 and 1024 inclusive.
*/
extern LZMA_API(lzma_ret) lzma_block_compressed_size(
lzma_block *block, lzma_vli unpadded_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Calculate Unpadded Size
*
* The Index field stores Unpadded Size and Uncompressed Size. The latter
* can be taken directly from the lzma_block structure after coding a Block,
* but Unpadded Size needs to be calculated from Block Header Size,
* Compressed Size, and size of the Check field. This is where this function
* is needed.
*
* \return Unpadded Size on success, or zero on error.
*/
extern LZMA_API(lzma_vli) lzma_block_unpadded_size(const lzma_block *block)
lzma_nothrow lzma_attr_pure;
/**
* \brief Calculate the total encoded size of a Block
*
* This is equivalent to lzma_block_unpadded_size() except that the returned
* value includes the size of the Block Padding field.
*
* \return On success, total encoded size of the Block. On error,
* zero is returned.
*/
extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block)
lzma_nothrow lzma_attr_pure;
/**
* \brief Initialize .xz Block encoder
*
* Valid actions for lzma_code() are LZMA_RUN, LZMA_SYNC_FLUSH (only if the
* filter chain supports it), and LZMA_FINISH.
*
* \return - LZMA_OK: All good, continue with lzma_code().
* - LZMA_MEM_ERROR
* - LZMA_OPTIONS_ERROR
* - LZMA_UNSUPPORTED_CHECK: block->check specfies a Check ID
* that is not supported by this buid of liblzma. Initializing
* the encoder failed.
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_block_encoder(
lzma_stream *strm, lzma_block *block)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Initialize .xz Block decoder
*
* Valid actions for lzma_code() are LZMA_RUN and LZMA_FINISH. Using
* LZMA_FINISH is not required. It is supported only for convenience.
*
* \return - LZMA_OK: All good, continue with lzma_code().
* - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but
* the given Check ID is not supported, thus Check will be
* ignored.
* - LZMA_PROG_ERROR
* - LZMA_MEM_ERROR
*/
extern LZMA_API(lzma_ret) lzma_block_decoder(
lzma_stream *strm, lzma_block *block)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Calculate maximum output size for single-call Block encoding
*
* This is equivalent to lzma_stream_buffer_bound() but for .xz Blocks.
* See the documentation of lzma_stream_buffer_bound().
*/
extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size)
lzma_nothrow;
/**
* \brief Single-call .xz Block encoder
*
* In contrast to the multi-call encoder initialized with
* lzma_block_encoder(), this function encodes also the Block Header. This
* is required to make it possible to write appropriate Block Header also
* in case the data isn't compressible, and different filter chain has to be
* used to encode the data in uncompressed form using uncompressed chunks
* of the LZMA2 filter.
*
* When the data isn't compressible, header_size, compressed_size, and
* uncompressed_size are set just like when the data was compressible, but
* it is possible that header_size is too small to hold the filter chain
* specified in block->filters, because that isn't necessarily the filter
* chain that was actually used to encode the data. lzma_block_unpadded_size()
* still works normally, because it doesn't read the filters array.
*
* \param block Block options: block->version, block->check,
* and block->filters must have been initialized.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
* \param in Beginning of the input buffer
* \param in_size Size of the input buffer
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* *out_pos is updated only if encoding succeeds.
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_BUF_ERROR: Not enough output buffer space.
* - LZMA_OPTIONS_ERROR
* - LZMA_MEM_ERROR
* - LZMA_DATA_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_block_buffer_encode(
lzma_block *block, lzma_allocator *allocator,
const uint8_t *in, size_t in_size,
uint8_t *out, size_t *out_pos, size_t out_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Single-call .xz Block decoder
*
* This is single-call equivalent of lzma_block_decoder(), and requires that
* the caller has already decoded Block Header and checked its memory usage.
*
* \param block Block options just like with lzma_block_decoder().
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
* \param in Beginning of the input buffer
* \param in_pos The next byte will be read from in[*in_pos].
* *in_pos is updated only if decoding succeeds.
* \param in_size Size of the input buffer; the first byte that
* won't be read is in[in_size].
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* *out_pos is updated only if encoding succeeds.
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return - LZMA_OK: Decoding was successful.
* - LZMA_OPTIONS_ERROR
* - LZMA_DATA_ERROR
* - LZMA_MEM_ERROR
* - LZMA_BUF_ERROR: Output buffer was too small.
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_block_buffer_decode(
lzma_block *block, lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos, size_t in_size,
uint8_t *out, size_t *out_pos, size_t out_size)
lzma_nothrow;
/**
* \file lzma/check.h
* \brief Integrity checks
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Type of the integrity check (Check ID)
*
* The .xz format supports multiple types of checks that are calculated
* from the uncompressed data. They vary in both speed and ability to
* detect errors.
*/
typedef enum {
LZMA_CHECK_NONE = 0,
/**<
* No Check is calculated.
*
* Size of the Check field: 0 bytes
*/
LZMA_CHECK_CRC32 = 1,
/**<
* CRC32 using the polynomial from the IEEE 802.3 standard
*
* Size of the Check field: 4 bytes
*/
LZMA_CHECK_CRC64 = 4,
/**<
* CRC64 using the polynomial from the ECMA-182 standard
*
* Size of the Check field: 8 bytes
*/
LZMA_CHECK_SHA256 = 10
/**<
* SHA-256
*
* Size of the Check field: 32 bytes
*/
} lzma_check;
/**
* \brief Maximum valid Check ID
*
* The .xz file format specification specifies 16 Check IDs (0-15). Some
* of them are only reserved, that is, no actual Check algorithm has been
* assigned. When decoding, liblzma still accepts unknown Check IDs for
* future compatibility. If a valid but unsupported Check ID is detected,
* liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK,
* LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK in container.h.
*/
#define LZMA_CHECK_ID_MAX 15
/**
* \brief Test if the given Check ID is supported
*
* Return true if the given Check ID is supported by this liblzma build.
* Otherwise false is returned. It is safe to call this with a value that
* is not in the range [0, 15]; in that case the return value is always false.
*
* You can assume that LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always
* supported (even if liblzma is built with limited features).
*/
extern LZMA_API(lzma_bool) lzma_check_is_supported(lzma_check check)
lzma_nothrow lzma_attr_const;
/**
* \brief Get the size of the Check field with the given Check ID
*
* Although not all Check IDs have a check algorithm associated, the size of
* every Check is already frozen. This function returns the size (in bytes) of
* the Check field with the specified Check ID. The values are:
* { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 }
*
* If the argument is not in the range [0, 15], UINT32_MAX is returned.
*/
extern LZMA_API(uint32_t) lzma_check_size(lzma_check check)
lzma_nothrow lzma_attr_const;
/**
* \brief Maximum size of a Check field
*/
#define LZMA_CHECK_SIZE_MAX 64
/**
* \brief Calculate CRC32
*
* Calculate CRC32 using the polynomial from the IEEE 802.3 standard.
*
* \param buf Pointer to the input buffer
* \param size Size of the input buffer
* \param crc Previously returned CRC value. This is used to
* calculate the CRC of a big buffer in smaller chunks.
* Set to zero when starting a new calculation.
*
* \return Updated CRC value, which can be passed to this function
* again to continue CRC calculation.
*/
extern LZMA_API(uint32_t) lzma_crc32(
const uint8_t *buf, size_t size, uint32_t crc)
lzma_nothrow lzma_attr_pure;
/**
* \brief Calculate CRC64
*
* Calculate CRC64 using the polynomial from the ECMA-182 standard.
*
* This function is used similarly to lzma_crc32(). See its documentation.
*/
extern LZMA_API(uint64_t) lzma_crc64(
const uint8_t *buf, size_t size, uint64_t crc)
lzma_nothrow lzma_attr_pure;
/*
* SHA-256 functions are currently not exported to public API.
* Contact Lasse Collin if you think it should be.
*/
/**
* \brief Get the type of the integrity check
*
* This function can be called only immediatelly after lzma_code() has
* returned LZMA_NO_CHECK, LZMA_UNSUPPORTED_CHECK, or LZMA_GET_CHECK.
* Calling this function in any other situation has undefined behavior.
*/
extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm)
lzma_nothrow;
/**
* \file lzma/container.h
* \brief File formats
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/************
* Encoding *
************/
/**
* \brief Default compression preset
*
* It's not straightforward to recommend a default preset, because in some
* cases keeping the resource usage relatively low is more important that
* getting the maximum compression ratio.
*/
#define LZMA_PRESET_DEFAULT UINT32_C(6)
/**
* \brief Mask for preset level
*
* This is useful only if you need to extract the level from the preset
* variable. That should be rare.
*/
#define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F)
/*
* Preset flags
*
* Currently only one flag is defined.
*/
/**
* \brief Extreme compression preset
*
* This flag modifies the preset to make the encoding significantly slower
* while improving the compression ratio only marginally. This is useful
* when you don't mind wasting time to get as small result as possible.
*
* This flag doesn't affect the memory usage requirements of the decoder (at
* least not significantly). The memory usage of the encoder may be increased
* a little but only at the lowest preset levels (0-2).
*/
#define LZMA_PRESET_EXTREME (UINT32_C(1) << 31)
/**
* \brief Calculate rough memory usage of easy encoder
*
* This function is a wrapper for lzma_raw_encoder_memusage().
*
* \param preset Compression preset (level and possible flags)
*/
extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset)
lzma_nothrow lzma_attr_pure;
/**
* \brief Calculate rough decoder memory usage of a preset
*
* This function is a wrapper for lzma_raw_decoder_memusage().
*
* \param preset Compression preset (level and possible flags)
*/
extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset)
lzma_nothrow lzma_attr_pure;
/**
* \brief Initialize .xz Stream encoder using a preset number
*
* This function is intended for those who just want to use the basic features
* if liblzma (that is, most developers out there).
*
* \param strm Pointer to lzma_stream that is at least initialized
* with LZMA_STREAM_INIT.
* \param preset Compression preset to use. A preset consist of level
* number and zero or more flags. Usually flags aren't
* used, so preset is simply a number [0, 9] which match
* the options -0 .. -9 of the xz command line tool.
* Additional flags can be be set using bitwise-or with
* the preset level number, e.g. 6 | LZMA_PRESET_EXTREME.
* \param check Integrity check type to use. See check.h for available
* checks. If you are unsure, use LZMA_CHECK_CRC32.
*
* \return - LZMA_OK: Initialization succeeded. Use lzma_code() to
* encode your data.
* - LZMA_MEM_ERROR: Memory allocation failed.
* - LZMA_OPTIONS_ERROR: The given compression level is not
* supported by this build of liblzma.
* - LZMA_UNSUPPORTED_CHECK: The given check type is not
* supported by this liblzma build.
* - LZMA_PROG_ERROR: One or more of the parameters have values
* that will never be valid. For example, strm == NULL.
*
* If initialization fails (return value is not LZMA_OK), all the memory
* allocated for *strm by liblzma is always freed. Thus, there is no need
* to call lzma_end() after failed initialization.
*
* If initialization succeeds, use lzma_code() to do the actual encoding.
* Valid values for `action' (the second argument of lzma_code()) are
* LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future,
* there may be compression levels or flags that don't support LZMA_SYNC_FLUSH.
*/
extern LZMA_API(lzma_ret) lzma_easy_encoder(
lzma_stream *strm, uint32_t preset, lzma_check check)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Single-call .xz Stream encoding using a preset number
*
* The maximum required output buffer size can be calculated with
* lzma_stream_buffer_bound().
*
* \param preset Compression preset to use. See the description
* in lzma_easy_encoder().
* \param check Type of the integrity check to calculate from
* uncompressed data.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
* \param in Beginning of the input buffer
* \param in_size Size of the input buffer
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* *out_pos is updated only if encoding succeeds.
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_BUF_ERROR: Not enough output buffer space.
* - LZMA_OPTIONS_ERROR
* - LZMA_MEM_ERROR
* - LZMA_DATA_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_easy_buffer_encode(
uint32_t preset, lzma_check check,
lzma_allocator *allocator, const uint8_t *in, size_t in_size,
uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow;
/**
* \brief Initialize .xz Stream encoder using a custom filter chain
*
* \param strm Pointer to properly prepared lzma_stream
* \param filters Array of filters. This must be terminated with
* filters[n].id = LZMA_VLI_UNKNOWN. See filter.h for
* more information.
* \param check Type of the integrity check to calculate from
* uncompressed data.
*
* \return - LZMA_OK: Initialization was successful.
* - LZMA_MEM_ERROR
* - LZMA_OPTIONS_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm,
const lzma_filter *filters, lzma_check check)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Initialize .lzma encoder (legacy file format)
*
* The .lzma format is sometimes called the LZMA_Alone format, which is the
* reason for the name of this function. The .lzma format supports only the
* LZMA1 filter. There is no support for integrity checks like CRC32.
*
* Use this function if and only if you need to create files readable by
* legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format
* is strongly recommended.
*
* The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH.
* No kind of flushing is supported, because the file format doesn't make
* it possible.
*
* \return - LZMA_OK
* - LZMA_MEM_ERROR
* - LZMA_OPTIONS_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_alone_encoder(
lzma_stream *strm, const lzma_options_lzma *options)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Calculate output buffer size for single-call Stream encoder
*
* When trying to compress uncompressible data, the encoded size will be
* slightly bigger than the input data. This function calculates how much
* output buffer space is required to be sure that lzma_stream_buffer_encode()
* doesn't return LZMA_BUF_ERROR.
*
* The calculated value is not exact, but it is guaranteed to be big enough.
* The actual maximum output space required may be slightly smaller (up to
* about 100 bytes). This should not be a problem in practice.
*
* If the calculated maximum size doesn't fit into size_t or would make the
* Stream grow past LZMA_VLI_MAX (which should never happen in practice),
* zero is returned to indicate the error.
*
* \note The limit calculated by this function applies only to
* single-call encoding. Multi-call encoding may (and probably
* will) have larger maximum expansion when encoding
* uncompressible data. Currently there is no function to
* calculate the maximum expansion of multi-call encoding.
*/
extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size)
lzma_nothrow;
/**
* \brief Single-call .xz Stream encoder
*
* \param filters Array of filters. This must be terminated with
* filters[n].id = LZMA_VLI_UNKNOWN. See filter.h
* for more information.
* \param check Type of the integrity check to calculate from
* uncompressed data.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
* \param in Beginning of the input buffer
* \param in_size Size of the input buffer
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* *out_pos is updated only if encoding succeeds.
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_BUF_ERROR: Not enough output buffer space.
* - LZMA_OPTIONS_ERROR
* - LZMA_MEM_ERROR
* - LZMA_DATA_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
lzma_filter *filters, lzma_check check,
lzma_allocator *allocator, const uint8_t *in, size_t in_size,
uint8_t *out, size_t *out_pos, size_t out_size)
lzma_nothrow lzma_attr_warn_unused_result;
/************
* Decoding *
************/
/**
* This flag makes lzma_code() return LZMA_NO_CHECK if the input stream
* being decoded has no integrity check. Note that when used with
* lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK
* if LZMA_TELL_NO_CHECK is used.
*/
#define LZMA_TELL_NO_CHECK UINT32_C(0x01)
/**
* This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input
* stream has an integrity check, but the type of the integrity check is not
* supported by this liblzma version or build. Such files can still be
* decoded, but the integrity check cannot be verified.
*/
#define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02)
/**
* This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type
* of the integrity check is known. The type can then be got with
* lzma_get_check().
*/
#define LZMA_TELL_ANY_CHECK UINT32_C(0x04)
/**
* This flag enables decoding of concatenated files with file formats that
* allow concatenating compressed files as is. From the formats currently
* supported by liblzma, only the .xz format allows concatenated files.
* Concatenated files are not allowed with the legacy .lzma format.
*
* This flag also affects the usage of the `action' argument for lzma_code().
* When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END
* unless LZMA_FINISH is used as `action'. Thus, the application has to set
* LZMA_FINISH in the same way as it does when encoding.
*
* If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH
* as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required.
*/
#define LZMA_CONCATENATED UINT32_C(0x08)
/**
* \brief Initialize .xz Stream decoder
*
* \param strm Pointer to properly prepared lzma_stream
* \param memlimit Rough memory usage limit as bytes
* \param flags Bitwise-or of zero or more of the decoder flags:
* LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
* LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED
*
* \return - LZMA_OK: Initialization was successful.
* - LZMA_MEM_ERROR: Cannot allocate memory.
* - LZMA_OPTIONS_ERROR: Unsupported flags
*/
extern LZMA_API(lzma_ret) lzma_stream_decoder(
lzma_stream *strm, uint64_t memlimit, uint32_t flags)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode .xz Streams and .lzma files with autodetection
*
* This decoder autodetects between the .xz and .lzma file formats, and
* calls lzma_stream_decoder() or lzma_alone_decoder() once the type
* of the input file has been detected.
*
* \param strm Pointer to properly prepared lzma_stream
* \param memlimit Rough memory usage limit as bytes
* \param flags Bitwise-or of flags, or zero for no flags.
*
* \return - LZMA_OK: Initialization was successful.
* - LZMA_MEM_ERROR: Cannot allocate memory.
* - LZMA_OPTIONS_ERROR: Unsupported flags
*/
extern LZMA_API(lzma_ret) lzma_auto_decoder(
lzma_stream *strm, uint64_t memlimit, uint32_t flags)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Initialize .lzma decoder (legacy file format)
*
* Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH.
* There is no need to use LZMA_FINISH, but allowing it may simplify
* certain types of applications.
*
* \return - LZMA_OK
* - LZMA_MEM_ERROR
*/
extern LZMA_API(lzma_ret) lzma_alone_decoder(
lzma_stream *strm, uint64_t memlimit)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Single-call .xz Stream decoder
*
* \param memlimit Pointer to how much memory the decoder is allowed
* to allocate. The value pointed by this pointer is
* modified if and only if LZMA_MEMLIMIT_ERROR is
* returned.
* \param flags Bitwise-or of zero or more of the decoder flags:
* LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
* LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK
* is not allowed and will return LZMA_PROG_ERROR.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
* \param in Beginning of the input buffer
* \param in_pos The next byte will be read from in[*in_pos].
* *in_pos is updated only if decoding succeeds.
* \param in_size Size of the input buffer; the first byte that
* won't be read is in[in_size].
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* *out_pos is updated only if encoding succeeds.
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return - LZMA_OK: Decoding was successful.
* - LZMA_FORMAT_ERROR
* - LZMA_OPTIONS_ERROR
* - LZMA_DATA_ERROR
* - LZMA_NO_CHECK: This can be returned only if using
* the LZMA_TELL_NO_CHECK flag.
* - LZMA_UNSUPPORTED_CHECK: This can be returned only if using
* the LZMA_TELL_UNSUPPORTED_CHECK flag.
* - LZMA_MEM_ERROR
* - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached.
* The minimum required memlimit value was stored to *memlimit.
* - LZMA_BUF_ERROR: Output buffer was too small.
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_stream_buffer_decode(
uint64_t *memlimit, uint32_t flags, lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos, size_t in_size,
uint8_t *out, size_t *out_pos, size_t out_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \file lzma/delta.h
* \brief Delta filter
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Filter ID
*
* Filter ID of the Delta filter. This is used as lzma_filter.id.
*/
#define LZMA_FILTER_DELTA LZMA_VLI_C(0x03)
/**
* \brief Type of the delta calculation
*
* Currently only byte-wise delta is supported. Other possible types could
* be, for example, delta of 16/32/64-bit little/big endian integers, but
* these are not currently planned since byte-wise delta is almost as good.
*/
typedef enum {
LZMA_DELTA_TYPE_BYTE
} lzma_delta_type;
/**
* \brief Options for the Delta filter
*
* These options are needed by both encoder and decoder.
*/
typedef struct {
/** For now, this must always be LZMA_DELTA_TYPE_BYTE. */
lzma_delta_type type;
/**
* \brief Delta distance
*
* With the only currently supported type, LZMA_DELTA_TYPE_BYTE,
* the distance is as bytes.
*
* Examples:
* - 16-bit stereo audio: distance = 4 bytes
* - 24-bit RGB image data: distance = 3 bytes
*/
uint32_t dist;
# define LZMA_DELTA_DIST_MIN 1
# define LZMA_DELTA_DIST_MAX 256
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. You should not touch these, because the names
* of these variables may change. These are and will never be used
* when type is LZMA_DELTA_TYPE_BYTE, so it is safe to leave these
* uninitialized.
*/
uint32_t reserved_int1;
uint32_t reserved_int2;
uint32_t reserved_int3;
uint32_t reserved_int4;
void *reserved_ptr1;
void *reserved_ptr2;
} lzma_options_delta;
/**
* \file lzma/filter.h
* \brief Common filter related types
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Maximum number of filters in a chain
*
* A filter chain can have 1-4 filters, of which three are allowed to change
* the size of the data. Usually only one or two filters are needed.
*/
#define LZMA_FILTERS_MAX 4
/**
* \brief Filter options
*
* This structure is used to pass Filter ID and a pointer filter's
* options to liblzma. A few functions work with a single lzma_filter
* structure, while most functions expect a filter chain.
*
* A filter chain is indicated with an array of lzma_filter structures.
* The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter
* array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to
* be able to hold any arbitrary filter chain. This is important when
* using lzma_block_header_decode() from block.h, because too small
* array would make liblzma write past the end of the filters array.
*/
typedef struct {
/**
* \brief Filter ID
*
* Use constants whose name begin with `LZMA_FILTER_' to specify
* different filters. In an array of lzma_filter structures, use
* LZMA_VLI_UNKNOWN to indicate end of filters.
*
* \note This is not an enum, because on some systems enums
* cannot be 64-bit.
*/
lzma_vli id;
/**
* \brief Pointer to filter-specific options structure
*
* If the filter doesn't need options, set this to NULL. If id is
* set to LZMA_VLI_UNKNOWN, options is ignored, and thus
* doesn't need be initialized.
*
* Some filters support changing the options in the middle of
* the encoding process. These filters store the pointer of the
* options structure and communicate with the application via
* modifications of the options structure.
*/
void *options;
} lzma_filter;
/**
* \brief Test if the given Filter ID is supported for encoding
*
* Return true if the give Filter ID is supported for encoding by this
* liblzma build. Otherwise false is returned.
*
* There is no way to list which filters are available in this particular
* liblzma version and build. It would be useless, because the application
* couldn't know what kind of options the filter would need.
*/
extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id)
lzma_nothrow lzma_attr_const;
/**
* \brief Test if the given Filter ID is supported for decoding
*
* Return true if the give Filter ID is supported for decoding by this
* liblzma build. Otherwise false is returned.
*/
extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id)
lzma_nothrow lzma_attr_const;
/**
* \brief Calculate rough memory requirements for raw encoder
*
* Because the calculation is rough, this function can be used to calculate
* the memory requirements for Block and Stream encoders too.
*
* \param filters Array of filters terminated with
* .id == LZMA_VLI_UNKNOWN.
*
* \return Rough number of bytes of memory required for the given
* filter chain when encoding.
*/
extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters)
lzma_nothrow lzma_attr_pure;
/**
* \brief Calculate rough memory requirements for raw decoder
*
* Because the calculation is rough, this function can be used to calculate
* the memory requirements for Block and Stream decoders too.
*
* \param filters Array of filters terminated with
* .id == LZMA_VLI_UNKNOWN.
*
* \return Rough number of bytes of memory required for the given
* filter chain when decoding.
*/
extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters)
lzma_nothrow lzma_attr_pure;
/**
* \brief Initialize raw encoder
*
* This function may be useful when implementing custom file formats.
*
* \param strm Pointer to properly prepared lzma_stream
* \param filters Array of lzma_filter structures. The end of the
* array must be marked with .id = LZMA_VLI_UNKNOWN.
*
* The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the
* filter chain supports it), or LZMA_FINISH.
*
* \return - LZMA_OK
* - LZMA_MEM_ERROR
* - LZMA_OPTIONS_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_raw_encoder(
lzma_stream *strm, const lzma_filter *filters)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Initialize raw decoder
*
* The initialization of raw decoder goes similarly to raw encoder.
*
* The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using
* LZMA_FINISH is not required, it is supported just for convenience.
*
* \return - LZMA_OK
* - LZMA_MEM_ERROR
* - LZMA_OPTIONS_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_raw_decoder(
lzma_stream *strm, const lzma_filter *filters)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Single-call raw encoder
*
* \param filters Array of lzma_filter structures. The end of the
* array must be marked with .id = LZMA_VLI_UNKNOWN.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
* \param in Beginning of the input buffer
* \param in_size Size of the input buffer
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* *out_pos is updated only if encoding succeeds.
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_BUF_ERROR: Not enough output buffer space.
* - LZMA_OPTIONS_ERROR
* - LZMA_MEM_ERROR
* - LZMA_DATA_ERROR
* - LZMA_PROG_ERROR
*
* \note There is no function to calculate how big output buffer
* would surely be big enough. (lzma_stream_buffer_bound()
* works only for lzma_stream_buffer_encode().)
*/
extern LZMA_API(lzma_ret) lzma_raw_buffer_encode(
const lzma_filter *filters, lzma_allocator *allocator,
const uint8_t *in, size_t in_size, uint8_t *out,
size_t *out_pos, size_t out_size) lzma_nothrow;
/**
* \brief Single-call raw decoder
*
* \param filters Array of lzma_filter structures. The end of the
* array must be marked with .id = LZMA_VLI_UNKNOWN.
* \param allocator lzma_allocator for custom allocator functions.
* Set to NULL to use malloc() and free().
* \param in Beginning of the input buffer
* \param in_pos The next byte will be read from in[*in_pos].
* *in_pos is updated only if decoding succeeds.
* \param in_size Size of the input buffer; the first byte that
* won't be read is in[in_size].
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* *out_pos is updated only if encoding succeeds.
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*/
extern LZMA_API(lzma_ret) lzma_raw_buffer_decode(
const lzma_filter *filters, lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos, size_t in_size,
uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow;
/**
* \brief Get the size of the Filter Properties field
*
* This function may be useful when implementing custom file formats
* using the raw encoder and decoder.
*
* \param size Pointer to uint32_t to hold the size of the properties
* \param filter Filter ID and options (the size of the propeties may
* vary depending on the options)
*
* \return - LZMA_OK
* - LZMA_OPTIONS_ERROR
* - LZMA_PROG_ERROR
*
* \note This function validates the Filter ID, but does not
* necessarily validate the options. Thus, it is possible
* that this returns LZMA_OK while the following call to
* lzma_properties_encode() returns LZMA_OPTIONS_ERROR.
*/
extern LZMA_API(lzma_ret) lzma_properties_size(
uint32_t *size, const lzma_filter *filter) lzma_nothrow;
/**
* \brief Encode the Filter Properties field
*
* \param filter Filter ID and options
* \param props Buffer to hold the encoded options. The size of
* buffer must have been already determined with
* lzma_properties_size().
*
* \return - LZMA_OK
* - LZMA_OPTIONS_ERROR
* - LZMA_PROG_ERROR
*
* \note Even this function won't validate more options than actually
* necessary. Thus, it is possible that encoding the properties
* succeeds but using the same options to initialize the encoder
* will fail.
*
* \note It is OK to skip calling this function if
* lzma_properties_size() indicated that the size
* of the Filter Properties field is zero.
*/
extern LZMA_API(lzma_ret) lzma_properties_encode(
const lzma_filter *filter, uint8_t *props) lzma_nothrow;
/**
* \brief Decode the Filter Properties field
*
* \param filter filter->id must have been set to the correct
* Filter ID. filter->options doesn't need to be
* initialized (it's not freed by this function). The
* decoded options will be stored to filter->options.
* filter->options is set to NULL if there are no
* properties or if an error occurs.
* \param allocator Custom memory allocator used to allocate the
* options. Set to NULL to use the default malloc(),
* and in case of an error, also free().
* \param props Input buffer containing the properties.
* \param props_size Size of the properties. This must be the exact
* size; giving too much or too little input will
* return LZMA_OPTIONS_ERROR.
*
* \return - LZMA_OK
* - LZMA_OPTIONS_ERROR
* - LZMA_MEM_ERROR
*/
extern LZMA_API(lzma_ret) lzma_properties_decode(
lzma_filter *filter, lzma_allocator *allocator,
const uint8_t *props, size_t props_size) lzma_nothrow;
/**
* \brief Calculate encoded size of a Filter Flags field
*
* Knowing the size of Filter Flags is useful to know when allocating
* memory to hold the encoded Filter Flags.
*
* \param size Pointer to integer to hold the calculated size
* \param filters Filter ID and associated options whose encoded
* size is to be calculted
*
* \return - LZMA_OK: *size set successfully. Note that this doesn't
* guarantee that filters->options is valid, thus
* lzma_filter_flags_encode() may still fail.
* - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options.
* - LZMA_PROG_ERROR: Invalid options
*
* \note If you need to calculate size of List of Filter Flags,
* you need to loop over every lzma_filter entry.
*/
extern LZMA_API(lzma_ret) lzma_filter_flags_size(
uint32_t *size, const lzma_filter *filters)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Encode Filter Flags into given buffer
*
* In contrast to some functions, this doesn't allocate the needed buffer.
* This is due to how this function is used internally by liblzma.
*
* \param filters Filter ID and options to be encoded
* \param out Beginning of the output buffer
* \param out_pos out[*out_pos] is the next write position. This
* is updated by the encoder.
* \param out_size out[out_size] is the first byte to not write.
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_OPTIONS_ERROR: Invalid or unsupported options.
* - LZMA_PROG_ERROR: Invalid options or not enough output
* buffer space (you should have checked it with
* lzma_filter_flags_size()).
*/
extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filters,
uint8_t *out, size_t *out_pos, size_t out_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode Filter Flags from given buffer
*
* The decoded result is stored into *filters. filters->options is
* initialized but the old value is NOT free()d.
*
* \return - LZMA_OK
* - LZMA_OPTIONS_ERROR
* - LZMA_MEM_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_filter_flags_decode(
lzma_filter *filters, lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos, size_t in_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \file lzma/hardware.h
* \brief Hardware information
*
* Since liblzma can consume a lot of system resources, it also provides
* ways to limit the resource usage. Applications linking against liblzma
* need to do the actual decisions how much resources to let liblzma to use.
* To ease making these decisions, liblzma provides functions to find out
* the relevant capabilities of the underlaying hardware. Currently there
* is only a function to find out the amount of RAM, but in the future there
* will be also a function to detect how many concurrent threads the system
* can run.
*
* \note On some operating systems, these function may temporarily
* load a shared library or open file descriptor(s) to find out
* the requested hardware information. Unless the application
* assumes that specific file descriptors are not touched by
* other threads, this should have no effect on thread safety.
* Possible operations involving file descriptors will restart
* the syscalls if they return EINTR.
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Get the total amount of physical memory (RAM) in bytes
*
* This function may be useful when determining a reasonable memory
* usage limit for decompressing or how much memory it is OK to use
* for compressing.
*
* \return On success, the total amount of physical memory in bytes
* is returned. If the amount of RAM cannot be determined,
* zero is returned. This can happen if an error occurs
* or if there is no code in liblzma to detect the amount
* of RAM on the specific operating system.
*/
extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow;
/**
* \file lzma/index.h
* \brief Handling of .xz Index lists
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Opaque data type to hold the Index
*/
typedef struct lzma_index_s lzma_index;
/**
* \brief Index Record and its location
*/
typedef struct {
/**
* \brief Total encoded size of a Block including Block Padding
*
* This value is useful if you need to know the actual size of the
* Block that the Block decoder will read.
*/
lzma_vli total_size;
/**
* \brief Encoded size of a Block excluding Block Padding
*
* This value is stored in the Index. When doing random-access
* reading, you should give this value to the Block decoder along
* with uncompressed_size.
*/
lzma_vli unpadded_size;
/**
* \brief Uncompressed Size of a Block
*/
lzma_vli uncompressed_size;
/**
* \brief Compressed offset in the Stream(s)
*
* This is the offset of the first byte of the Block, that is,
* where you need to seek to decode the Block. The offset
* is relative to the beginning of the Stream, or if there are
* multiple Indexes combined, relative to the beginning of the
* first Stream.
*/
lzma_vli stream_offset;
/**
* \brief Uncompressed offset
*
* When doing random-access reading, it is possible that the target
* offset is not exactly at Block boundary. One will need to compare
* the target offset against uncompressed_offset, and possibly decode
* and throw away some amount of data before reaching the target
* offset.
*/
lzma_vli uncompressed_offset;
} lzma_index_record;
/**
* \brief Calculate memory usage for Index with given number of Records
*
* On disk, the size of the Index field depends on both the number of Records
* stored and how big values the Records store (due to variable-length integer
* encoding). When the Index is kept in lzma_index structure, the memory usage
* depends only on the number of Records stored in the Index. The size in RAM
* is almost always a lot bigger than in encoded form on disk.
*
* This function calculates an approximate amount of memory needed hold the
* given number of Records in lzma_index structure. This value may vary
* between liblzma versions if the internal implementation is modified.
*
* If you want to know how much memory an existing lzma_index structure is
* using, use lzma_index_memusage(lzma_index_count(i)).
*/
extern LZMA_API(uint64_t) lzma_index_memusage(lzma_vli record_count)
lzma_nothrow;
/**
* \brief Allocate and initialize a new lzma_index structure
*
* If i is NULL, a new lzma_index structure is allocated, initialized,
* and a pointer to it returned. If allocation fails, NULL is returned.
*
* If i is non-NULL, it is reinitialized and the same pointer returned.
* In this case, return value cannot be NULL or a different pointer than
* the i that was given as an argument.
*/
extern LZMA_API(lzma_index *) lzma_index_init(
lzma_index *i, lzma_allocator *allocator) lzma_nothrow;
/**
* \brief Deallocate the Index
*
* If i is NULL, this does nothing.
*/
extern LZMA_API(void) lzma_index_end(lzma_index *i, lzma_allocator *allocator)
lzma_nothrow;
/**
* \brief Add a new Record to an Index
*
* \param i Pointer to a lzma_index structure
* \param allocator Pointer to lzma_allocator, or NULL to
* use malloc()
* \param unpadded_size Unpadded Size of a Block. This can be
* calculated with lzma_block_unpadded_size()
* after encoding or decoding the Block.
* \param uncompressed_size Uncompressed Size of a Block. This can be
* taken directly from lzma_block structure
* after encoding or decoding the Block.
*
* Appending a new Record does not affect the read position.
*
* \return - LZMA_OK
* - LZMA_MEM_ERROR
* - LZMA_DATA_ERROR: Compressed or uncompressed size of the
* Stream or size of the Index field would grow too big.
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_index_append(
lzma_index *i, lzma_allocator *allocator,
lzma_vli unpadded_size, lzma_vli uncompressed_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Get the number of Records
*/
extern LZMA_API(lzma_vli) lzma_index_count(const lzma_index *i)
lzma_nothrow lzma_attr_pure;
/**
* \brief Get the size of the Index field as bytes
*
* This is needed to verify the Backward Size field in the Stream Footer.
*/
extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i)
lzma_nothrow lzma_attr_pure;
/**
* \brief Get the total size of the Blocks
*
* This doesn't include the Stream Header, Stream Footer, Stream Padding,
* or Index fields.
*/
extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i)
lzma_nothrow lzma_attr_pure;
/**
* \brief Get the total size of the Stream
*
* If multiple Indexes have been combined, this works as if the Blocks
* were in a single Stream.
*/
extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i)
lzma_nothrow lzma_attr_pure;
/**
* \brief Get the total size of the file
*
* When no Indexes have been combined with lzma_index_cat(), this function is
* identical to lzma_index_stream_size(). If multiple Indexes have been
* combined, this includes also the headers of each separate Stream and the
* possible Stream Padding fields.
*/
extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i)
lzma_nothrow lzma_attr_pure;
/**
* \brief Get the uncompressed size of the Stream
*/
extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i)
lzma_nothrow lzma_attr_pure;
/**
* \brief Get the next Record from the Index
*/
extern LZMA_API(lzma_bool) lzma_index_read(
lzma_index *i, lzma_index_record *record)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Rewind the Index
*
* Rewind the Index so that next call to lzma_index_read() will return the
* first Record.
*/
extern LZMA_API(void) lzma_index_rewind(lzma_index *i) lzma_nothrow;
/**
* \brief Locate a Record
*
* When the Index is available, it is possible to do random-access reading
* with granularity of Block size.
*
* \param i Pointer to lzma_index structure
* \param record Pointer to a structure to hold the search results
* \param target Uncompressed target offset which the caller would
* like to locate from the Stream
*
* If the target is smaller than the uncompressed size of the Stream (can be
* checked with lzma_index_uncompressed_size()):
* - Information about the Record containing the requested uncompressed
* offset is stored into *record.
* - Read offset will be adjusted so that calling lzma_index_read() can be
* used to read subsequent Records.
* - This function returns false.
*
* If target is greater than the uncompressed size of the Stream, *record
* and the read position are not modified, and this function returns true.
*/
extern LZMA_API(lzma_bool) lzma_index_locate(
lzma_index *i, lzma_index_record *record, lzma_vli target)
lzma_nothrow;
/**
* \brief Concatenate Indexes of two Streams
*
* Concatenating Indexes is useful when doing random-access reading in
* multi-Stream .xz file, or when combining multiple Streams into single
* Stream.
*
* \param dest Destination Index after which src is appended
* \param src Source Index. If this function succeeds, the
* memory allocated for src is freed or moved to
* be part of dest.
* \param allocator Custom memory allocator; can be NULL to use
* malloc() and free().
* \param padding Size of the Stream Padding field between Streams.
* This must be a multiple of four.
*
* \return - LZMA_OK: Indexes concatenated successfully. src is now
* a dangling pointer.
* - LZMA_DATA_ERROR: *dest would grow too big.
* - LZMA_MEM_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *lzma_restrict dest,
lzma_index *lzma_restrict src,
lzma_allocator *allocator, lzma_vli padding)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Duplicate an Index list
*
* Makes an identical copy of the Index. Also the read position is copied.
*
* \return A copy of the Index, or NULL if memory allocation failed.
*/
extern LZMA_API(lzma_index *) lzma_index_dup(
const lzma_index *i, lzma_allocator *allocator)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Compare if two Index lists are identical
*
* Read positions are not compared.
*
* \return True if *a and *b are equal, false otherwise.
*/
extern LZMA_API(lzma_bool) lzma_index_equal(
const lzma_index *a, const lzma_index *b)
lzma_nothrow lzma_attr_pure;
/**
* \brief Initialize .xz Index encoder
*
* \param strm Pointer to properly prepared lzma_stream
* \param i Pointer to lzma_index which should be encoded.
* The read position will be at the end of the Index
* after lzma_code() has returned LZMA_STREAM_END.
*
* The only valid action value for lzma_code() is LZMA_RUN.
*
* \return - LZMA_OK: Initialization succeeded, continue with lzma_code().
* - LZMA_MEM_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_index_encoder(lzma_stream *strm, lzma_index *i)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Initialize .xz Index decoder
*
* \param strm Pointer to properly prepared lzma_stream
* \param i Pointer to a pointer that will be made to point
* to the final decoded Index once lzma_code() has
* returned LZMA_STREAM_END. That is,
* lzma_index_decoder() always takes care of
* allocating a new lzma_index structure, and *i
* doesn't need to be initialized by the caller.
* \param memlimit How much memory the resulting Index is allowed
* to require.
*
* The only valid action value for lzma_code() is LZMA_RUN.
*
* \return - LZMA_OK: Initialization succeeded, continue with lzma_code().
* - LZMA_MEM_ERROR
* - LZMA_MEMLIMIT_ERROR
* - LZMA_PROG_ERROR
*
* \note The memory usage limit is checked early in the decoding
* (within the first dozen input bytes or so). The actual memory
* is allocated later in smaller pieces. If the memory usage
* limit is modified with lzma_memlimit_set() after a part
* of the Index has already been decoded, the new limit may
* get ignored.
*/
extern LZMA_API(lzma_ret) lzma_index_decoder(
lzma_stream *strm, lzma_index **i, uint64_t memlimit)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Single-call .xz Index encoder
*
* \param i Index to be encoded. The read position will be at
* the end of the Index if encoding succeeds, or at
* unspecified position in case an error occurs.
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* *out_pos is updated only if encoding succeeds.
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_BUF_ERROR: Output buffer is too small. Use
* lzma_index_size() to find out how much output
* space is needed.
* - LZMA_PROG_ERROR
*
* \note This function doesn't take allocator argument since all
* the internal data is allocated on stack.
*/
extern LZMA_API(lzma_ret) lzma_index_buffer_encode(lzma_index *i,
uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow;
/**
* \brief Single-call .xz Index decoder
*
* \param i Pointer to a pointer that will be made to point
* to the final decoded Index if decoding is
* successful. That is, lzma_index_buffer_decode()
* always takes care of allocating a new
* lzma_index structure, and *i doesn't need to be
* initialized by the caller.
* \param memlimit Pointer to how much memory the resulting Index
* is allowed to require. The value pointed by
* this pointer is modified if and only if
* LZMA_MEMLIMIT_ERROR is returned.
* \param allocator Pointer to lzma_allocator, or NULL to use malloc()
* \param in Beginning of the input buffer
* \param in_pos The next byte will be read from in[*in_pos].
* *in_pos is updated only if decoding succeeds.
* \param in_size Size of the input buffer; the first byte that
* won't be read is in[in_size].
*
* \return - LZMA_OK: Decoding was successful.
* - LZMA_MEM_ERROR
* - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached.
* The minimum required memlimit value was stored to *memlimit.
* - LZMA_DATA_ERROR
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i,
uint64_t *memlimit, lzma_allocator *allocator,
const uint8_t *in, size_t *in_pos, size_t in_size)
lzma_nothrow;
/**
* \file lzma/index_hash.h
* \brief Validates Index by using a hash function
*
* Hashing makes it possible to use constant amount of memory to validate
* Index of arbitrary size.
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Opaque data type to hold the Index hash
*/
typedef struct lzma_index_hash_s lzma_index_hash;
/**
* \brief Allocate and initialize a new lzma_index_hash structure
*
* If index_hash is NULL, a new lzma_index_hash structure is allocated,
* initialized, and a pointer to it returned. If allocation fails, NULL
* is returned.
*
* If index_hash is non-NULL, it is reinitialized and the same pointer
* returned. In this case, return value cannot be NULL or a different
* pointer than the index_hash that was given as an argument.
*/
extern LZMA_API(lzma_index_hash *) lzma_index_hash_init(
lzma_index_hash *index_hash, lzma_allocator *allocator)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Deallocate lzma_index_hash structure
*/
extern LZMA_API(void) lzma_index_hash_end(
lzma_index_hash *index_hash, lzma_allocator *allocator)
lzma_nothrow;
/**
* \brief Add a new Record to an Index hash
*
* \param index Pointer to a lzma_index_hash structure
* \param unpadded_size Unpadded Size of a Block
* \param uncompressed_size Uncompressed Size of a Block
*
* \return - LZMA_OK
* - LZMA_DATA_ERROR: Compressed or uncompressed size of the
* Stream or size of the Index field would grow too big.
* - LZMA_PROG_ERROR: Invalid arguments or this function is being
* used when lzma_index_hash_decode() has already been used.
*/
extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash,
lzma_vli unpadded_size, lzma_vli uncompressed_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode and validate the Index field
*
* After telling the sizes of all Blocks with lzma_index_hash_append(),
* the actual Index field is decoded with this function. Specifically,
* once decoding of the Index field has been started, no more Records
* can be added using lzma_index_hash_append().
*
* This function doesn't use lzma_stream structure to pass the input data.
* Instead, the input buffer is specified using three arguments. This is
* because it matches better the internal APIs of liblzma.
*
* \param index_hash Pointer to a lzma_index_hash structure
* \param in Pointer to the beginning of the input buffer
* \param in_pos in[*in_pos] is the next byte to process
* \param in_size in[in_size] is the first byte not to process
*
* \return - LZMA_OK: So far good, but more input is needed.
* - LZMA_STREAM_END: Index decoded successfully and it matches
* the Records given with lzma_index_hash_append().
* - LZMA_DATA_ERROR: Index is corrupt or doesn't match the
* information given with lzma_index_hash_append().
* - LZMA_BUF_ERROR: Cannot progress because *in_pos >= in_size.
* - LZMA_PROG_ERROR
*/
extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash,
const uint8_t *in, size_t *in_pos, size_t in_size)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Get the size of the Index field as bytes
*
* This is needed to verify the Backward Size field in the Stream Footer.
*/
extern LZMA_API(lzma_vli) lzma_index_hash_size(
const lzma_index_hash *index_hash)
lzma_nothrow lzma_attr_pure;
/**
* \file lzma/lzma.h
* \brief LZMA1 and LZMA2 filters
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief LZMA1 Filter ID
*
* LZMA1 is the very same thing as what was called just LZMA in LZMA Utils,
* 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from
* accidentally using LZMA when they actually want LZMA2.
*
* LZMA1 shouldn't be used for new applications unless you _really_ know
* what you are doing. LZMA2 is almost always a better choice.
*/
#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001)
/**
* \brief LZMA2 Filter ID
*
* Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds
* support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion
* when trying to compress uncompressible data), possibility to change
* lc/lp/pb in the middle of encoding, and some other internal improvements.
*/
#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21)
/**
* \brief Match finders
*
* Match finder has major effect on both speed and compression ratio.
* Usually hash chains are faster than binary trees.
*
* The memory usage formulas are only rough estimates, which are closest to
* reality when dict_size is a power of two. The formulas are more complex
* in reality, and can also change a little between liblzma versions. Use
* lzma_memusage_encoder() to get more accurate estimate of memory usage.
*/
typedef enum {
LZMA_MF_HC3 = 0x03,
/**<
* \brief Hash Chain with 2- and 3-byte hashing
*
* Minimum nice_len: 3
*
* Memory usage:
* - dict_size <= 16 MiB: dict_size * 7.5
* - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB
*/
LZMA_MF_HC4 = 0x04,
/**<
* \brief Hash Chain with 2-, 3-, and 4-byte hashing
*
* Minimum nice_len: 4
*
* Memory usage: dict_size * 7.5
*/
LZMA_MF_BT2 = 0x12,
/**<
* \brief Binary Tree with 2-byte hashing
*
* Minimum nice_len: 2
*
* Memory usage: dict_size * 9.5
*/
LZMA_MF_BT3 = 0x13,
/**<
* \brief Binary Tree with 2- and 3-byte hashing
*
* Minimum nice_len: 3
*
* Memory usage:
* - dict_size <= 16 MiB: dict_size * 11.5
* - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB
*/
LZMA_MF_BT4 = 0x14
/**<
* \brief Binary Tree with 2-, 3-, and 4-byte hashing
*
* Minimum nice_len: 4
*
* Memory usage: dict_size * 11.5
*/
} lzma_match_finder;
/**
* \brief Test if given match finder is supported
*
* Return true if the given match finder is supported by this liblzma build.
* Otherwise false is returned. It is safe to call this with a value that
* isn't listed in lzma_match_finder enumeration; the return value will be
* false.
*
* There is no way to list which match finders are available in this
* particular liblzma version and build. It would be useless, because
* a new match finder, which the application developer wasn't aware,
* could require giving additional options to the encoder that the older
* match finders don't need.
*/
extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder)
lzma_nothrow lzma_attr_const;
/**
* \brief Compression modes
*
* This selects the function used to analyze the data produced by the match
* finder.
*/
typedef enum {
LZMA_MODE_FAST = 1,
/**<
* \brief Fast compression
*
* Fast mode is usually at its best when combined with
* a hash chain match finder.
*/
LZMA_MODE_NORMAL = 2
/**<
* \brief Normal compression
*
* This is usually notably slower than fast mode. Use this
* together with binary tree match finders to expose the
* full potential of the LZMA1 or LZMA2 encoder.
*/
} lzma_mode;
/**
* \brief Test if given compression mode is supported
*
* Return true if the given compression mode is supported by this liblzma
* build. Otherwise false is returned. It is safe to call this with a value
* that isn't listed in lzma_mode enumeration; the return value will be false.
*
* There is no way to list which modes are available in this particular
* liblzma version and build. It would be useless, because a new compression
* mode, which the application developer wasn't aware, could require giving
* additional options to the encoder that the older modes don't need.
*/
extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode)
lzma_nothrow lzma_attr_const;
/**
* \brief Options specific to the LZMA1 and LZMA2 filters
*
* Since LZMA1 and LZMA2 share most of the code, it's simplest to share
* the options structure too. For encoding, all but the reserved variables
* need to be initialized unless specifically mentioned otherwise.
*
* For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and
* preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb.
*/
typedef struct {
/**
* \brief Dictionary size in bytes
*
* Dictionary size indicates how many bytes of the recently processed
* uncompressed data is kept in memory. One method to reduce size of
* the uncompressed data is to store distance-length pairs, which
* indicate what data to repeat from the dictionary buffer. Thus,
* the bigger the dictionary, the better the compression ratio
* usually is.
*
* Maximum size of the dictionary depends on multiple things:
* - Memory usage limit
* - Available address space (not a problem on 64-bit systems)
* - Selected match finder (encoder only)
*
* Currently the maximum dictionary size for encoding is 1.5 GiB
* (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit
* systems for certain match finder implementation reasons. In the
* future, there may be match finders that support bigger
* dictionaries.
*
* Decoder already supports dictionaries up to 4 GiB - 1 B (i.e.
* UINT32_MAX), so increasing the maximum dictionary size of the
* encoder won't cause problems for old decoders.
*
* Because extremely small dictionaries sizes would have unneeded
* overhead in the decoder, the minimum dictionary size is 4096 bytes.
*
* \note When decoding, too big dictionary does no other harm
* than wasting memory.
*/
uint32_t dict_size;
# define LZMA_DICT_SIZE_MIN UINT32_C(4096)
# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23)
/**
* \brief Pointer to an initial dictionary
*
* It is possible to initialize the LZ77 history window using
* a preset dictionary. It is useful when compressing many
* similar, relatively small chunks of data independently from
* each other. The preset dictionary should contain typical
* strings that occur in the files being compressed. The most
* probable strings should be near the end of the preset dictionary.
*
* This feature should be used only in special situations. For
* now, it works correctly only with raw encoding and decoding.
* Currently none of the container formats supported by
* liblzma allow preset dictionary when decoding, thus if
* you create a .xz or .lzma file with preset dictionary, it
* cannot be decoded with the regular decoder functions. In the
* future, the .xz format will likely get support for preset
* dictionary though.
*/
const uint8_t *preset_dict;
/**
* \brief Size of the preset dictionary
*
* Specifies the size of the preset dictionary. If the size is
* bigger than dict_size, only the last dict_size bytes are
* processed.
*
* This variable is read only when preset_dict is not NULL.
* If preset_dict is not NULL but preset_dict_size is zero,
* no preset dictionary is used (identical to only setting
* preset_dict to NULL).
*/
uint32_t preset_dict_size;
/**
* \brief Number of literal context bits
*
* How many of the highest bits of the previous uncompressed
* eight-bit byte (also known as `literal') are taken into
* account when predicting the bits of the next literal.
*
* \todo Example
*
* There is a limit that applies to literal context bits and literal
* position bits together: lc + lp <= 4. Without this limit the
* decoding could become very slow, which could have security related
* results in some cases like email servers doing virus scanning.
* This limit also simplifies the internal implementation in liblzma.
*
* There may be LZMA1 streams that have lc + lp > 4 (maximum possible
* lc would be 8). It is not possible to decode such streams with
* liblzma.
*/
uint32_t lc;
# define LZMA_LCLP_MIN 0
# define LZMA_LCLP_MAX 4
# define LZMA_LC_DEFAULT 3
/**
* \brief Number of literal position bits
*
* How many of the lowest bits of the current position (number
* of bytes from the beginning of the uncompressed data) in the
* uncompressed data is taken into account when predicting the
* bits of the next literal (a single eight-bit byte).
*
* \todo Example
*/
uint32_t lp;
# define LZMA_LP_DEFAULT 0
/**
* \brief Number of position bits
*
* How many of the lowest bits of the current position in the
* uncompressed data is taken into account when estimating
* probabilities of matches. A match is a sequence of bytes for
* which a matching sequence is found from the dictionary and
* thus can be stored as distance-length pair.
*
* Example: If most of the matches occur at byte positions of
* 8 * n + 3, that is, 3, 11, 19, ... set pb to 3, because 2**3 == 8.
*/
uint32_t pb;
# define LZMA_PB_MIN 0
# define LZMA_PB_MAX 4
# define LZMA_PB_DEFAULT 2
/**
* \brief Indicate if the options structure is persistent
*
* If this is true, the application must keep this options structure
* available after the LZMA2 encoder has been initialized. With
* persistent structure it is possible to change some encoder options
* in the middle of the encoding process without resetting the encoder.
*
* This option is used only by LZMA2. LZMA1 ignores this and it is
* safe to not initialize this when encoding with LZMA1.
*/
lzma_bool persistent;
/** Compression mode */
lzma_mode mode;
/**
* \brief Nice length of a match
*
* This determines how many bytes the encoder compares from the match
* candidates when looking for the best match. Once a match of at
* least nice_len bytes long is found, the encoder stops looking for
* better condidates and encodes the match. (Naturally, if the found
* match is actually longer than nice_len, the actual length is
* encoded; it's not truncated to nice_len.)
*
* Bigger values usually increase the compression ratio and
* compression time. For most files, 32 to 128 is a good value,
* which gives very good compression ratio at good speed.
*
* The exact minimum value depends on the match finder. The maximum
* is 273, which is the maximum length of a match that LZMA1 and
* LZMA2 can encode.
*/
uint32_t nice_len;
/** Match finder ID */
lzma_match_finder mf;
/**
* \brief Maximum search depth in the match finder
*
* For every input byte, match finder searches through the hash chain
* or binary tree in a loop, each iteration going one step deeper in
* the chain or tree. The searching stops if
* - a match of at least nice_len bytes long is found;
* - all match candidates from the hash chain or binary tree have
* been checked; or
* - maximum search depth is reached.
*
* Maximum search depth is needed to prevent the match finder from
* wasting too much time in case there are lots of short match
* candidates. On the other hand, stopping the search before all
* candidates have been checked can reduce compression ratio.
*
* Setting depth to zero tells liblzma to use an automatic default
* value, that depends on the selected match finder and nice_len.
* The default is in the range [10, 200] or so (it may vary between
* liblzma versions).
*
* Using a bigger depth value than the default can increase
* compression ratio in some cases. There is no strict maximum value,
* but high values (thousands or millions) should be used with care:
* the encoder could remain fast enough with typical input, but
* malicious input could cause the match finder to slow down
* dramatically, possibly creating a denial of service attack.
*/
uint32_t depth;
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. You should not touch these, because the names
* of these variables may change. These are and will never be used
* with the currently supported options, so it is safe to leave these
* uninitialized.
*/
void *reserved_ptr1;
void *reserved_ptr2;
uint32_t reserved_int1;
uint32_t reserved_int2;
uint32_t reserved_int3;
uint32_t reserved_int4;
uint32_t reserved_int5;
uint32_t reserved_int6;
uint32_t reserved_int7;
uint32_t reserved_int8;
lzma_reserved_enum reserved_enum1;
lzma_reserved_enum reserved_enum2;
lzma_reserved_enum reserved_enum3;
lzma_reserved_enum reserved_enum4;
} lzma_options_lzma;
/**
* \brief Set a compression preset to lzma_options_lzma structure
*
* 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9
* of the xz command line tool. In addition, it is possible to bitwise-or
* flags to the preset. Currently only LZMA_PRESET_EXTREME is supported.
* The flags are defined in container.h, because the flags are used also
* with lzma_easy_encoder().
*
* The preset values are subject to changes between liblzma versions.
*
* This function is available only if LZMA1 or LZMA2 encoder has been enabled
* when building liblzma.
*/
extern LZMA_API(lzma_bool) lzma_lzma_preset(
lzma_options_lzma *options, uint32_t preset) lzma_nothrow;
/**
* \file lzma/stream_flags.h
* \brief .xz Stream Header and Stream Footer encoder and decoder
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Size of Stream Header and Stream Footer
*
* Stream Header and Stream Footer have the same size and they are not
* going to change even if a newer version of the .xz file format is
* developed in future.
*/
#define LZMA_STREAM_HEADER_SIZE 12
/**
* \brief Options for encoding/decoding Stream Header and Stream Footer
*/
typedef struct {
/**
* \brief Stream Flags format version
*
* To prevent API and ABI breakages if new features are needed in
* Stream Header or Stream Footer, a version number is used to
* indicate which fields in this structure are in use. For now,
* version must always be zero. With non-zero version, the
* lzma_stream_header_encode() and lzma_stream_footer_encode()
* will return LZMA_OPTIONS_ERROR.
*
* lzma_stream_header_decode() and lzma_stream_footer_decode()
* will always set this to the lowest value that supports all the
* features indicated by the Stream Flags field. The application
* must check that the version number set by the decoding functions
* is supported by the application. Otherwise it is possible that
* the application will decode the Stream incorrectly.
*/
uint32_t version;
/**
* \brief Backward Size
*
* Backward Size must be a multiple of four bytes. In this Stream
* format version, Backward Size is the size of the Index field.
*
* Backward Size isn't actually part of the Stream Flags field, but
* it is convenient to include in this structure anyway. Backward
* Size is present only in the Stream Footer. There is no need to
* initialize backward_size when encoding Stream Header.
*
* lzma_stream_header_decode() always sets backward_size to
* LZMA_VLI_UNKNOWN so that it is convenient to use
* lzma_stream_flags_compare() when both Stream Header and Stream
* Footer have been decoded.
*/
lzma_vli backward_size;
# define LZMA_BACKWARD_SIZE_MIN 4
# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34)
/**
* \brief Check ID
*
* This indicates the type of the integrity check calculated from
* uncompressed data.
*/
lzma_check check;
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. You should not touch these, because the
* names of these variables may change.
*
* (We will never be able to use all of these since Stream Flags
* is just two bytes plus Backward Size of four bytes. But it's
* nice to have the proper types when they are needed.)
*/
lzma_reserved_enum reserved_enum1;
lzma_reserved_enum reserved_enum2;
lzma_reserved_enum reserved_enum3;
lzma_reserved_enum reserved_enum4;
lzma_reserved_enum reserved_enum5;
lzma_reserved_enum reserved_enum6;
lzma_bool reserved_bool1;
lzma_bool reserved_bool2;
lzma_bool reserved_bool3;
lzma_bool reserved_bool4;
lzma_bool reserved_bool5;
lzma_bool reserved_bool6;
lzma_bool reserved_bool7;
lzma_bool reserved_bool8;
uint32_t reserved_int1;
uint32_t reserved_int2;
uint32_t reserved_int3;
uint32_t reserved_int4;
} lzma_stream_flags;
/**
* \brief Encode Stream Header
*
* \param options Stream Header options to be encoded.
* options->backward_size is ignored and doesn't
* need to be initialized.
* \param out Beginning of the output buffer of
* LZMA_STREAM_HEADER_SIZE bytes.
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_OPTIONS_ERROR: options->version is not supported by
* this liblzma version.
* - LZMA_PROG_ERROR: Invalid options.
*/
extern LZMA_API(lzma_ret) lzma_stream_header_encode(
const lzma_stream_flags *options, uint8_t *out)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Encode Stream Footer
*
* \param options Stream Footer options to be encoded.
* \param out Beginning of the output buffer of
* LZMA_STREAM_HEADER_SIZE bytes.
*
* \return - LZMA_OK: Encoding was successful.
* - LZMA_OPTIONS_ERROR: options->version is not supported by
* this liblzma version.
* - LZMA_PROG_ERROR: Invalid options.
*/
extern LZMA_API(lzma_ret) lzma_stream_footer_encode(
const lzma_stream_flags *options, uint8_t *out)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode Stream Header
*
* \param options Stream Header options to be encoded.
* \param in Beginning of the input buffer of
* LZMA_STREAM_HEADER_SIZE bytes.
*
* options->backward_size is always set to LZMA_VLI_UNKNOWN. This is to
* help comparing Stream Flags from Stream Header and Stream Footer with
* lzma_stream_flags_compare().
*
* \return - LZMA_OK: Decoding was successful.
* - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given
* buffer cannot be Stream Header.
* - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header
* is corrupt.
* - LZMA_OPTIONS_ERROR: Unsupported options are present
* in the header.
*
* \note When decoding .xz files that contain multiple Streams, it may
* make sense to print "file format not recognized" only if
* decoding of the Stream Header of the _first_ Stream gives
* LZMA_FORMAT_ERROR. If non-first Stream Header gives
* LZMA_FORMAT_ERROR, the message used for LZMA_DATA_ERROR is
* probably more appropriate.
*
* For example, Stream decoder in liblzma uses LZMA_DATA_ERROR if
* LZMA_FORMAT_ERROR is returned by lzma_stream_header_decode()
* when decoding non-first Stream.
*/
extern LZMA_API(lzma_ret) lzma_stream_header_decode(
lzma_stream_flags *options, const uint8_t *in)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Decode Stream Footer
*
* \param options Stream Header options to be encoded.
* \param in Beginning of the input buffer of
* LZMA_STREAM_HEADER_SIZE bytes.
*
* \return - LZMA_OK: Decoding was successful.
* - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given
* buffer cannot be Stream Footer.
* - LZMA_DATA_ERROR: CRC32 doesn't match, thus the Stream Footer
* is corrupt.
* - LZMA_OPTIONS_ERROR: Unsupported options are present
* in Stream Footer.
*
* \note If Stream Header was already decoded successfully, but
* decoding Stream Footer returns LZMA_FORMAT_ERROR, the
* application should probably report some other error message
* than "file format not recognized", since the file more likely
* is corrupt (possibly truncated). Stream decoder in liblzma
* uses LZMA_DATA_ERROR in this situation.
*/
extern LZMA_API(lzma_ret) lzma_stream_footer_decode(
lzma_stream_flags *options, const uint8_t *in)
lzma_nothrow lzma_attr_warn_unused_result;
/**
* \brief Compare two lzma_stream_flags structures
*
* backward_size values are compared only if both are not
* LZMA_VLI_UNKNOWN.
*
* \return - LZMA_OK: Both are equal. If either had backward_size set
* to LZMA_VLI_UNKNOWN, backward_size values were not
* compared or validated.
* - LZMA_DATA_ERROR: The structures differ.
* - LZMA_OPTIONS_ERROR: version in either structure is greater
* than the maximum supported version (currently zero).
* - LZMA_PROG_ERROR: Invalid value, e.g. invalid check or
* backward_size.
*/
extern LZMA_API(lzma_ret) lzma_stream_flags_compare(
const lzma_stream_flags *a, const lzma_stream_flags *b)
lzma_nothrow lzma_attr_pure;
/**
* \file lzma/subblock.h
* \brief Subblock filter
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Filter ID
*
* Filter ID of the Subblock filter. This is used as lzma_filter.id.
*/
#define LZMA_FILTER_SUBBLOCK LZMA_VLI_C(0x01)
/**
* \brief Subfilter mode
*
* See lzma_options_subblock.subfilter_mode for details.
*/
typedef enum {
LZMA_SUBFILTER_NONE,
/**<
* No Subfilter is in use.
*/
LZMA_SUBFILTER_SET,
/**<
* New Subfilter has been requested to be initialized.
*/
LZMA_SUBFILTER_RUN,
/**<
* Subfilter is active.
*/
LZMA_SUBFILTER_FINISH
/**<
* Subfilter has been requested to be finished.
*/
} lzma_subfilter_mode;
/**
* \brief Options for the Subblock filter
*
* Specifying options for the Subblock filter is optional: if the pointer
* options is NULL, no subfilters are allowed and the default value is used
* for subblock_data_size.
*/
typedef struct {
/* Options for encoder and decoder */
/**
* \brief Allowing subfilters
*
* If this true, subfilters are allowed.
*
* In the encoder, if this is set to false, subfilter_mode and
* subfilter_options are completely ignored.
*/
lzma_bool allow_subfilters;
/* Options for encoder only */
/**
* \brief Alignment
*
* The Subblock filter encapsulates the input data into Subblocks.
* Each Subblock has a header which takes a few bytes of space.
* When the output of the Subblock encoder is fed to another filter
* that takes advantage of the alignment of the input data (e.g. LZMA),
* the Subblock filter can add padding to keep the actual data parts
* in the Subblocks aligned correctly.
*
* The alignment should be a positive integer. Subblock filter will
* add enough padding between Subblocks so that this is true for
* every payload byte:
* input_offset % alignment == output_offset % alignment
*
* The Subblock filter assumes that the first output byte will be
* written to a position in the output stream that is properly
* aligned. This requirement is automatically met when the start
* offset of the Stream or Block is correctly told to Block or
* Stream encoder.
*/
uint32_t alignment;
# define LZMA_SUBBLOCK_ALIGNMENT_MIN 1
# define LZMA_SUBBLOCK_ALIGNMENT_MAX 32
# define LZMA_SUBBLOCK_ALIGNMENT_DEFAULT 4
/**
* \brief Size of the Subblock Data part of each Subblock
*
* This value is re-read every time a new Subblock is started.
*
* Bigger values
* - save a few bytes of space;
* - increase latency in the encoder (but no effect for decoding);
* - decrease memory locality (increased cache pollution) in the
* encoder (no effect in decoding).
*/
uint32_t subblock_data_size;
# define LZMA_SUBBLOCK_DATA_SIZE_MIN 1
# define LZMA_SUBBLOCK_DATA_SIZE_MAX (UINT32_C(1) << 28)
# define LZMA_SUBBLOCK_DATA_SIZE_DEFAULT 4096
/**
* \brief Run-length encoder remote control
*
* The Subblock filter has an internal run-length encoder (RLE). It
* can be useful when the data includes byte sequences that repeat
* very many times. The RLE can be used also when a Subfilter is
* in use; the RLE will be applied to the output of the Subfilter.
*
* Note that in contrast to traditional RLE, this RLE is intended to
* be used only when there's a lot of data to be repeated. If the
* input data has e.g. 500 bytes of NULs now and then, this RLE
* is probably useless, because plain LZMA should provide better
* results.
*
* Due to above reasons, it was decided to keep the implementation
* of the RLE very simple. When the rle variable is non-zero, it
* subblock_data_size must be a multiple of rle. Once the Subblock
* encoder has got subblock_data_size bytes of input, it will check
* if the whole buffer of the last subblock_data_size can be
* represented with repeats of chunks having size of rle bytes.
*
* If there are consecutive identical buffers of subblock_data_size
* bytes, they will be encoded using a single repeat entry if
* possible.
*
* If need arises, more advanced RLE can be implemented later
* without breaking API or ABI.
*/
uint32_t rle;
# define LZMA_SUBBLOCK_RLE_OFF 0
# define LZMA_SUBBLOCK_RLE_MIN 1
# define LZMA_SUBBLOCK_RLE_MAX 256
/**
* \brief Subfilter remote control
*
* When the Subblock filter is initialized, this variable must be
* LZMA_SUBFILTER_NONE or LZMA_SUBFILTER_SET.
*
* When subfilter_mode is LZMA_SUBFILTER_NONE, the application may
* put Subfilter options to subfilter_options structure, and then
* set subfilter_mode to LZMA_SUBFILTER_SET. No new input data will
* be read until the Subfilter has been enabled. Once the Subfilter
* has been enabled, liblzma will set subfilter_mode to
* LZMA_SUBFILTER_RUN.
*
* When subfilter_mode is LZMA_SUBFILTER_RUN, the application may
* set subfilter_mode to LZMA_SUBFILTER_FINISH. All the input
* currently available will be encoded before unsetting the
* Subfilter. Application must not change the amount of available
* input until the Subfilter has finished. Once the Subfilter has
* finished, liblzma will set subfilter_mode to LZMA_SUBFILTER_NONE.
*
* If the intent is to have Subfilter enabled to the very end of
* the data, it is not needed to separately disable Subfilter with
* LZMA_SUBFILTER_FINISH. Using LZMA_FINISH as the second argument
* of lzma_code() will make the Subblock encoder to disable the
* Subfilter once all the data has been ran through the Subfilter.
*
* After the first call with LZMA_SYNC_FLUSH or LZMA_FINISH, the
* application must not change subfilter_mode until LZMA_STREAM_END.
* Setting LZMA_SUBFILTER_SET/LZMA_SUBFILTER_FINISH and
* LZMA_SYNC_FLUSH/LZMA_FINISH _at the same time_ is fine.
*
* \note This variable is ignored if allow_subfilters is false.
*/
lzma_subfilter_mode subfilter_mode;
/**
* \brief Subfilter and its options
*
* When no Subfilter is used, the data is copied as is into Subblocks.
* Setting a Subfilter allows encoding some parts of the data with
* an additional filter. It is possible to many different Subfilters
* in the same Block, although only one can be used at once.
*
* \note This variable is ignored if allow_subfilters is false.
*/
lzma_filter subfilter_options;
} lzma_options_subblock;
/**
* \file lzma/version.h
* \brief Version number
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/*
* Version number splitted in components
*/
#define LZMA_VERSION_MAJOR 4
#define LZMA_VERSION_MINOR 999
#define LZMA_VERSION_PATCH 9
#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_BETA
#ifndef LZMA_VERSION_COMMIT
# define LZMA_VERSION_COMMIT ""
#endif
/*
* Map symbolic stability levels to integers.
*/
#define LZMA_VERSION_STABILITY_ALPHA 0
#define LZMA_VERSION_STABILITY_BETA 1
#define LZMA_VERSION_STABILITY_STABLE 2
/**
* \brief Compile-time version number
*
* The version number is of format xyyyzzzs where
* - x = major
* - yyy = minor
* - zzz = revision
* - s indicates stability: 0 = alpha, 1 = beta, 2 = stable
*
* The same xyyyzzz triplet is never reused with different stability levels.
* For example, if 5.1.0alpha has been released, there will never be 5.1.0beta
* or 5.1.0 stable.
*
* \note The version number of liblzma has nothing to with
* the version number of Igor Pavlov's LZMA SDK.
*/
#define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \
+ LZMA_VERSION_MINOR * UINT32_C(10000) \
+ LZMA_VERSION_PATCH * UINT32_C(10) \
+ LZMA_VERSION_STABILITY)
/*
* Macros to construct the compile-time version string
*/
#if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA
# define LZMA_VERSION_STABILITY_STRING "alpha"
#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA
# define LZMA_VERSION_STABILITY_STRING "beta"
#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE
# define LZMA_VERSION_STABILITY_STRING ""
#else
# error Incorrect LZMA_VERSION_STABILITY
#endif
#define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \
#major "." #minor "." #patch stability commit
#define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \
LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit)
/**
* \brief Compile-time version as a string
*
* This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable
* versions don't have any "stable" suffix). In future, a snapshot built
* from source code repository may include an additional suffix, for example
* "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form
* in LZMA_VERSION macro.
*/
#define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \
LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \
LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \
LZMA_VERSION_COMMIT)
/* #ifndef is needed for use with windres (MinGW or Cygwin). */
#ifndef LZMA_H_INTERNAL_RC
/**
* \brief Run-time version number as an integer
*
* Return the value of LZMA_VERSION macro at the compile time of liblzma.
* This allows the application to compare if it was built against the same,
* older, or newer version of liblzma that is currently running.
*/
extern LZMA_API(uint32_t) lzma_version_number(void)
lzma_nothrow lzma_attr_const;
/**
* \brief Run-time version as a string
*
* This function may be useful if you want to display which version of
* liblzma your application is currently using.
*/
extern LZMA_API(const char *) lzma_version_string(void)
lzma_nothrow lzma_attr_const;
#endif
/**
* \file lzma/vli.h
* \brief Variable-length integer handling
*
* In the .xz format, most integers are encoded in a variable-length
* representation, which is sometimes called little endian base-128 encoding.
* This saves space when smaller values are more likely than bigger values.
*
* The encoding scheme encodes seven bits to every byte, using minimum
* number of bytes required to represent the given value. Encodings that use
* non-minimum number of bytes are invalid, thus every integer has exactly
* one encoded representation. The maximum number of bits in a VLI is 63,
* thus the vli argument must be at maximum of UINT64_MAX / 2. You should
* use LZMA_VLI_MAX for clarity.
*/
/*
* Author: Lasse Collin
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*
* See ../lzma.h for information about liblzma as a whole.
*/
#ifndef LZMA_H_INTERNAL
# error Never include this file directly. Use <lzma.h> instead.
#endif
/**
* \brief Maximum supported value of variable-length integer
*/
#define LZMA_VLI_MAX (UINT64_MAX / 2)
/**
* \brief VLI value to denote that the value is unknown
*/
#define LZMA_VLI_UNKNOWN UINT64_MAX
/**
* \brief Maximum supported length of variable length integers
*/
#define LZMA_VLI_BYTES_MAX 9
/**
* \brief VLI constant suffix
*/
#define LZMA_VLI_C(n) UINT64_C(n)
/**
* \brief Variable-length integer type
*
* This will always be unsigned integer. Valid VLI values are in the range
* [0, LZMA_VLI_MAX]. Unknown value is indicated with LZMA_VLI_UNKNOWN,
* which is the maximum value of the underlaying integer type.
*
* In future, even if lzma_vli is typdefined to something else than uint64_t,
* it is guaranteed that 2 * LZMA_VLI_MAX will not overflow lzma_vli.
* This simplifies integer overflow detection.
*/
typedef uint64_t lzma_vli;
/**
* \brief Simple macro to validate variable-length integer
*
* This is useful to test that application has given acceptable values
* for example in the uncompressed_size and compressed_size variables.
*
* \return True if the integer is representable as VLI or if it
* indicates unknown value.
*/
#define lzma_vli_is_valid(vli) \
((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN)
/**
* \brief Encode a variable-length integer
*
* This function has two modes: single-call and multi-call. Single-call mode
* encodes the whole integer at once; it is an error if the output buffer is
* too small. Multi-call mode saves the position in *vli_pos, and thus it is
* possible to continue encoding if the buffer becomes full before the whole
* integer has been encoded.
*
* \param vli Integer to be encoded
* \param vli_pos How many VLI-encoded bytes have already been written
* out. When starting to encode a new integer, *vli_pos
* must be set to zero. To use single-call encoding,
* set vli_pos to NULL.
* \param out Beginning of the output buffer
* \param out_pos The next byte will be written to out[*out_pos].
* \param out_size Size of the out buffer; the first byte into
* which no data is written to is out[out_size].
*
* \return Slightly different return values are used in multi-call and
* single-call modes.
*
* Single-call (vli_pos == NULL):
* - LZMA_OK: Integer successfully encoded.
* - LZMA_PROG_ERROR: Arguments are not sane. This can be due
* to too little output space; single-call mode doesn't use
* LZMA_BUF_ERROR, since the application should have checked
* the encoded size with lzma_vli_size().
*
* Multi-call (vli_pos != NULL):
* - LZMA_OK: So far all OK, but the integer is not
* completely written out yet.
* - LZMA_STREAM_END: Integer successfully encoded.
* - LZMA_BUF_ERROR: No output space was provided.
* - LZMA_PROG_ERROR: Arguments are not sane.
*/
extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli,
size_t *lzma_restrict vli_pos, uint8_t *lzma_restrict out,
size_t *lzma_restrict out_pos, size_t out_size) lzma_nothrow;
/**
* \brief Decode a variable-length integer
*
* Like lzma_vli_encode(), this function has single-call and multi-call modes.
*
* \param vli Pointer to decoded integer. The decoder will
* initialize it to zero when *vli_pos == 0, so
* application isn't required to initialize *vli.
* \param vli_pos How many bytes have already been decoded. When
* starting to decode a new integer, *vli_pos must
* be initialized to zero. To use single-call decoding,
* set this to NULL.
* \param in Beginning of the input buffer
* \param in_pos The next byte will be read from in[*in_pos].
* \param in_size Size of the input buffer; the first byte that
* won't be read is in[in_size].
*
* \return Slightly different return values are used in multi-call and
* single-call modes.
*
* Single-call (vli_pos == NULL):
* - LZMA_OK: Integer successfully decoded.
* - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting
* the end of the input buffer before the whole integer was
* decoded; providing no input at all will use LZMA_DATA_ERROR.
* - LZMA_PROG_ERROR: Arguments are not sane.
*
* Multi-call (vli_pos != NULL):
* - LZMA_OK: So far all OK, but the integer is not
* completely decoded yet.
* - LZMA_STREAM_END: Integer successfully decoded.
* - LZMA_DATA_ERROR: Integer is corrupt.
* - LZMA_BUF_ERROR: No input was provided.
* - LZMA_PROG_ERROR: Arguments are not sane.
*/
extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *lzma_restrict vli,
size_t *lzma_restrict vli_pos, const uint8_t *lzma_restrict in,
size_t *lzma_restrict in_pos, size_t in_size) lzma_nothrow;
/**
* \brief Get the number of bytes required to encode a VLI
*
* \return Number of bytes on success (1-9). If vli isn't valid,
* zero is returned.
*/
extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli)
lzma_nothrow lzma_attr_pure;
......@@ -119,10 +119,10 @@ NEWBRT_O_FILES += brtloader.$(OEXT) quicklz.$(OEXT) compress.$(OEXT)
brtloader.$(OEXT): $(DEPEND_COMPILE)
$(NEWBRT_O_FILES): VISIBILITY=
$(NEWBRT_O_FILES): $(LZMA_H)
$(NEWBRT_SO): DISABLE_WARNING += 10237 # Do not complain about -lcilkrts being linked in dynamically, static library not available
$(NEWBRT_SO): $(NEWBRT_O_FILES)
echo $(patsubst %,newbrt/%,$(NEWBRT_O_FILES)) > ../lib/newbrt.olist
$(TOKULINKER) $(SHARED) $(SYMBOLS) $(GCOV_FLAGS) $(SKIP_WARNING) $(NEWBRT_O_FILES) -o$(NEWBRT_SO) $(LINUX_NOSTDLIB) $(LCILKRTS)
$(NEWBRT_SO): $(NEWBRT_O_FILES) $(LZMA_A)
$(TOKULINKER) $(SHARED) $(SYMBOLS) $(GCOV_FLAGS) $(SKIP_WARNING) $(NEWBRT_O_FILES) $(LZMA_A) -o$(NEWBRT_SO) $(LINUX_NOSTDLIB) $(LCILKRTS)
$(NEWBRT_A): $(NEWBRT_O_FILES)
log_code.$(OEXT): log_header.h wbuf.h log-internal.h rbuf.h
......@@ -145,7 +145,7 @@ logformat$(BINSUF): logformat.c $(LIBPORTABILITY_SO)
ifeq ($(PROF),1)
libs: $(NEWBRT_A)
else
libs: $(NEWBRT_SO)
libs: $(NEWBRT_SO) $(NEWBRT_A)
endif
bins: $(BINS)
......@@ -176,13 +176,14 @@ clean: clean-local clean-tests
clean-tests:
cd tests;$(MAKE) clean
clean-local:
$(RM) $(TOKUROOT)lib/libnewbrt.$(AEXT) $(TOKUROOT)lib/libnewbrt.$(SOEXT)
rm -rf $(NEWBRT)
rm -rf test_oexcl.c.tmp *.brt
rm -rf log_code.c log_header.h log_print.c logformat
brtdump_static$(BINSUF): DEPEND_LINK = -lpthread -ldl -lz
brtdump_static$(BINSUF): brtdump.$(OEXT) $(NEWBRT_A) $(LIBPORTABILITY_A) $(DEPEND_COMPILE) $(DEPEND_LINK)
$(CC) $< $(NEWBRT_A) $(LIBPORTABILITY_A) $(BIN_FROM_O_FLAGS_NOLIB) $(ALWAYS_LINK) $(LINK_MUST_BE_LAST)
brtdump_static$(BINSUF): brtdump.$(OEXT) $(NEWBRT_A) $(LZMA_A) $(LIBPORTABILITY_A) $(DEPEND_COMPILE) $(DEPEND_LINK)
$(CC) $< $(NEWBRT_A) $(LZMA_A) $(LIBPORTABILITY_A) $(BIN_FROM_O_FLAGS_NOLIB) $(ALWAYS_LINK) $(LINK_MUST_BE_LAST)
# After doing (cd ../src/tests;make test_log5.recover), run these. The files should have no differences.
testdump: brtdump$(BINSUF)
......
......@@ -27,6 +27,7 @@
#include "block_table.h"
#include "c_dialects.h"
#include "mempool.h"
#include "compress.h"
// Uncomment the following to use quicklz
......@@ -411,6 +412,7 @@ struct brt_header {
uint32_t count_of_optimize_in_progress; // the number of hot optimize operations currently in progress on this tree
uint32_t count_of_optimize_in_progress_read_from_disk; // the number of hot optimize operations in progress on this tree at the time of the last crash (this field is in-memory only)
MSN msn_at_start_of_last_completed_optimize; // all messages before this msn have been applied to leaf nodes
enum toku_compression_method compression_method;
};
struct brt {
......
......@@ -348,9 +348,9 @@ serialize_brtnode_partition(BRTNODE node, int i, struct sub_block *sb) {
// into a newly allocated buffer sb->compressed_ptr
//
static void
compress_brtnode_sub_block(struct sub_block *sb) {
compress_brtnode_sub_block(struct sub_block *sb, enum toku_compression_method method) {
assert(sb->compressed_ptr == NULL);
set_compressed_size_bound(sb);
set_compressed_size_bound(sb, method);
// add 8 extra bytes, 4 for compressed size, 4 for decompressed size
sb->compressed_ptr = toku_xmalloc(sb->compressed_size_bound + 8);
//
......@@ -371,7 +371,8 @@ compress_brtnode_sub_block(struct sub_block *sb) {
sb->compressed_size = compress_nocrc_sub_block(
sb,
(char *)sb->compressed_ptr + 8,
sb->compressed_size_bound
sb->compressed_size_bound,
method
);
u_int32_t* extra = (u_int32_t *)(sb->compressed_ptr);
......@@ -691,13 +692,13 @@ static void
serialize_and_compress_partition(BRTNODE node, int childnum, SUB_BLOCK sb)
{
serialize_brtnode_partition(node, childnum, sb);
compress_brtnode_sub_block(sb);
compress_brtnode_sub_block(sb, node->h->compression_method);
}
void
toku_create_compressed_partition_from_available(
BRTNODE node,
int childnum,
BRTNODE node,
int childnum,
SUB_BLOCK sb
)
{
......@@ -773,7 +774,7 @@ toku_serialize_brtnode_to_memory (BRTNODE node,
// This does NOT include the header
//
serialize_brtnode_info(node, &sb_node_info);
compress_brtnode_sub_block(&sb_node_info);
compress_brtnode_sub_block(&sb_node_info, node->h->compression_method);
// now we have compressed each of our pieces into individual sub_blocks,
// we can put the header and all the subblocks into a single buffer
......@@ -1838,6 +1839,8 @@ serialize_brt_header_min_size (u_int32_t version) {
switch(version) {
case BRT_LAYOUT_VERSION_19:
size += 1; // compression method
case BRT_LAYOUT_VERSION_18:
size += sizeof(uint64_t); // time_of_last_optimize_begin
size += sizeof(uint64_t); // time_of_last_optimize_end
......@@ -1923,6 +1926,7 @@ int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h,
wbuf_ulonglong(wbuf, h->time_of_last_optimize_end);
wbuf_int(wbuf, h->count_of_optimize_in_progress);
wbuf_MSN(wbuf, h->msn_at_start_of_last_completed_optimize);
wbuf_char(wbuf, (unsigned char) h->compression_method);
u_int32_t checksum = x1764_finish(&wbuf->checksum);
wbuf_int(wbuf, checksum);
lazy_assert(wbuf->ndone == wbuf->size);
......@@ -2204,6 +2208,17 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
h->count_of_optimize_in_progress_read_from_disk = h->count_of_optimize_in_progress;
h->msn_at_start_of_last_completed_optimize = rbuf_msn(&rc);
}
if (h->layout_version >= BRT_LAYOUT_VERSION_19) {
unsigned char method = rbuf_char(&rc);
h->compression_method = (enum toku_compression_method) method;
} else {
// we hard coded zlib until 5.2, then quicklz in 5.2
if (h->layout_version < BRT_LAYOUT_VERSION_18) {
h->compression_method = TOKU_ZLIB_METHOD;
} else {
h->compression_method = TOKU_QUICKLZ_METHOD;
}
}
(void)rbuf_int(&rc); //Read in checksum and ignore (already verified).
if (rc.ndone!=rc.size) {ret = EINVAL; goto died1;}
......@@ -2257,6 +2272,7 @@ deserialize_brtheader_versioned (int fd, struct rbuf *rb, struct brt_header **br
case BRT_LAYOUT_VERSION_14:
h->basementnodesize = 128*1024; // basement nodes added in v15
//fall through on purpose
case BRT_LAYOUT_VERSION_19:
case BRT_LAYOUT_VERSION_18:
case BRT_LAYOUT_VERSION_17: // version 17 never released to customers
case BRT_LAYOUT_VERSION_16: // version 16 never released to customers
......@@ -2527,10 +2543,11 @@ static int
serialize_uncompressed_block_to_memory(char * uncompressed_buf,
int n_sub_blocks,
struct sub_block sub_block[/*n_sub_blocks*/],
enum toku_compression_method method,
/*out*/ size_t *n_bytes_to_write,
/*out*/ char **bytes_to_write) {
// allocate space for the compressed uncompressed_buf
size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block);
size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block, method);
size_t sub_block_header_len = sub_block_header_size(n_sub_blocks);
size_t header_len = node_header_overhead + sub_block_header_len + sizeof (uint32_t); // node + sub_block + checksum
char *XMALLOC_N(header_len + compressed_len, compressed_buf);
......@@ -2546,7 +2563,7 @@ serialize_uncompressed_block_to_memory(char * uncompressed_buf,
// compress all of the sub blocks
char *uncompressed_ptr = uncompressed_buf + node_header_overhead;
char *compressed_ptr = compressed_buf + header_len;
compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, num_cores, brt_pool);
compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, num_cores, brt_pool, method);
//if (0) printf("Block %" PRId64 " Size before compressing %u, after compression %"PRIu64"\n", blocknum.b, calculated_size-node_header_overhead, (uint64_t) compressed_len);
......@@ -2576,6 +2593,7 @@ serialize_uncompressed_block_to_memory(char * uncompressed_buf,
static int
toku_serialize_rollback_log_to_memory (ROLLBACK_LOG_NODE log,
int UU(n_workitems), int UU(n_threads),
enum toku_compression_method method,
/*out*/ size_t *n_bytes_to_write,
/*out*/ char **bytes_to_write) {
// get the size of the serialized node
......@@ -2600,7 +2618,7 @@ toku_serialize_rollback_log_to_memory (ROLLBACK_LOG_NODE log,
serialize_rollback_log_node_to_buf(log, buf, calculated_size, n_sub_blocks, sub_block);
//Compress and malloc buffer to write
int result = serialize_uncompressed_block_to_memory(buf, n_sub_blocks, sub_block,
int result = serialize_uncompressed_block_to_memory(buf, n_sub_blocks, sub_block, method,
n_bytes_to_write, bytes_to_write);
toku_free(buf);
return result;
......@@ -2613,7 +2631,7 @@ toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE log
size_t n_to_write;
char *compressed_buf;
{
int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, &n_to_write, &compressed_buf);
int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, h->compression_method, &n_to_write, &compressed_buf);
if (r!=0) return r;
}
......
......@@ -3349,6 +3349,7 @@ brt_init_header (BRT t, TOKUTXN txn) {
//Assign blocknum for root block, also dirty the header
toku_allocate_blocknum(t->h->blocktable, &root, t->h);
t->h->root_blocknum = root;
t->h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
toku_list_init(&t->h->live_brts);
toku_list_init(&t->h->zombie_brts);
......@@ -3391,6 +3392,20 @@ brt_alloc_init_header(BRT t, TOKUTXN txn) {
return r;
}
int
toku_brt_set_compression_method(BRT t, enum toku_compression_method method)
{
t->h->compression_method = method;
return 0;
}
int
toku_brt_get_compression_method(BRT t, enum toku_compression_method *methodp)
{
*methodp = t->h->compression_method;
return 0;
}
int toku_read_brt_header_and_store_in_cachefile (BRT brt, CACHEFILE cf, LSN max_acceptable_lsn, struct brt_header **header, BOOL* was_open)
// If the cachefile already has the header, then just get it.
// If the cachefile has not been initialized, then don't modify anything.
......@@ -6857,6 +6872,7 @@ toku_brt_header_init(struct brt_header *h,
h->root_blocknum = root_blocknum_on_disk;
h->flags = 0;
h->root_xid_that_created = root_xid_that_created;
h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
}
#include <valgrind/helgrind.h>
......
......@@ -14,6 +14,7 @@
#include "log.h"
#include "brt-search.h"
#include "c_dialects.h"
#include "compress.h"
C_BEGIN
......@@ -49,6 +50,8 @@ int toku_brt_get_nodesize(BRT, unsigned int *nodesize) __attribute__ ((warn_unus
void toku_brt_get_maximum_advised_key_value_lengths(unsigned int *klimit, unsigned int *vlimit);
int toku_brt_set_basementnodesize(BRT, unsigned int basementnodesize) __attribute__ ((warn_unused_result));
int toku_brt_get_basementnodesize(BRT, unsigned int *basementnodesize) __attribute__ ((warn_unused_result));
int toku_brt_set_compression_method(BRT, enum toku_compression_method) __attribute__ ((warn_unused_result));
int toku_brt_get_compression_method(BRT, enum toku_compression_method *) __attribute__((warn_unused_result));
int toku_brt_set_bt_compare(BRT, brt_compare_func) __attribute__ ((warn_unused_result));
brt_compare_func toku_brt_get_bt_compare (BRT brt);
......
......@@ -23,6 +23,7 @@ enum brt_layout_version_e {
// ALERT ALERT ALERT: version 16 never released to customers, internal and beta use only
BRT_LAYOUT_VERSION_17 = 17, // Dr. No: Add STAT64INFO_S to brt_header
BRT_LAYOUT_VERSION_18 = 18, // Dr. No: Add HOT info to brt_header
BRT_LAYOUT_VERSION_19 = 19, // Doofenshmirtz: Add compression method, msn_for_upgrade, TODO
BRT_NEXT_VERSION, // the version after the current version
BRT_LAYOUT_VERSION = BRT_NEXT_VERSION-1, // A hack so I don't have to change this line.
BRT_LAYOUT_MIN_SUPPORTED_VERSION = BRT_LAYOUT_VERSION_13, // Minimum version supported
......
......@@ -1848,6 +1848,7 @@ static int cachetable_put_internal(
);
assert(p);
nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex);
//note_hash_count(count);
return 0;
}
......@@ -2750,6 +2751,7 @@ int toku_cachetable_get_and_pin_nonblocking (
if (!nb_mutex_writers(&p->value_nb_mutex) &&
(!may_modify_value || resolve_checkpointing_fast(p)))
{
//cachetable_hit++;
nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex);
if (may_modify_value && p->checkpoint_pending) {
write_locked_pair_for_checkpoint(ct, p);
......
......@@ -3,22 +3,42 @@
#ident "$Id$"
#include <zlib.h>
#include <lzma.h>
#include "compress.h"
#include "memory.h"
#include "quicklz.h"
#include "toku_assert.h"
static inline enum toku_compression_method
normalize_compression_method(enum toku_compression_method method)
// Effect: resolve "friendly" names like "fast" and "small" into their real values.
{
switch (method) {
case TOKU_FAST_COMPRESSION_METHOD:
return TOKU_QUICKLZ_METHOD;
case TOKU_SMALL_COMPRESSION_METHOD:
return TOKU_LZMA_METHOD;
default:
return method; // everything else is fine
}
}
size_t toku_compress_bound (enum toku_compression_method a, size_t size)
// See compress.h for the specification of this function.
{
a = normalize_compression_method(a);
switch (a) {
case TOKU_NO_COMPRESSION:
return size + 1;
case TOKU_LZMA_METHOD:
return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level)
case TOKU_QUICKLZ_METHOD:
return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL.
case TOKU_ZLIB_METHOD:
return compressBound (size);
default:
break;
}
// fall through for bad enum (thus compiler can warn us if we didn't use all the enums
assert(0); return 0;
......@@ -32,6 +52,7 @@ void toku_compress (enum toku_compression_method a,
const Bytef *source, uLong sourceLen)
// See compress.h for the specification of this function.
{
a = normalize_compression_method(a);
assert(sourceLen < (1LL << 32));
switch (a) {
case TOKU_NO_COMPRESSION:
......@@ -60,7 +81,29 @@ void toku_compress (enum toku_compression_method a,
// Fill in that first byte
dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4);
return;
}}
}
case TOKU_LZMA_METHOD: {
const int lzma_compression_level = 2;
if (sourceLen==0) {
// lzma version 4.999 requires at least one byte, so we'll do it ourselves.
assert(1<=*destLen);
*destLen = 1;
} else {
size_t out_pos = 1;
lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level, LZMA_CHECK_CRC32, NULL,
source, sourceLen,
dest, &out_pos, *destLen);
assert(out_pos < *destLen);
assert(r==LZMA_OK);
*destLen = out_pos;
}
dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4);
return;
}
default:
break;
}
// default fall through to error.
assert(0);
}
......@@ -92,6 +135,24 @@ void toku_decompress (Bytef *dest, uLongf destLen,
assert(destLen==0);
}
return;
case TOKU_LZMA_METHOD: {
if (sourceLen>1) {
uint64_t memlimit = UINT64_MAX;
size_t out_pos = 0;
size_t in_pos = 1;
lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check
0, // flags
NULL, // allocator
source, &in_pos, sourceLen,
dest, &out_pos, destLen);
assert(r==LZMA_OK);
assert(out_pos == destLen);
} else {
// length 1 means there is no data, so do nothing.
assert(destLen==0);
}
return;
}
}
// default fall through to error.
assert(0);
......
......@@ -5,16 +5,11 @@
#ident "$Id$"
#include <zlib.h>
#include <db.h>
// The following provides an abstraction of quicklz and zlib.
// We offer two compression methods: ZLIB and QUICKLZ.
// The resulting byte string includes enough information for us to decompress it. That is, we can tell whether it's z-compressed or qz-compressed.
enum toku_compression_method {
TOKU_NO_COMPRESSION = 0, // "identity" compression
TOKU_ZLIB_METHOD = 8, // RFC 1950 says use 8 for zlib. It reserves 15 to allow more bytes.
TOKU_QUICKLZ_METHOD = 9 // We use 9 for QUICKLZ with compression level = 3. I couldn't find any standard for any other numbers, so I just use 9. -Bradley
};
// The following provides an abstraction of quicklz and zlib.
// We offer three compression methods: ZLIB, QUICKLZ, and LZMA, as well as a "no compression" option. These options are declared in make_tdb.c.
// The resulting byte string includes enough information for us to decompress it. That is, we can tell whether it's z-compressed or qz-compressed or xz-compressed.
size_t toku_compress_bound (enum toku_compression_method a, size_t size);
// Effect: Return the number of bytes needed to compress a buffer of size SIZE using compression method A.
......
......@@ -37,24 +37,17 @@ sub_block_header_size(int n_sub_blocks) {
return sizeof (u_int32_t) + n_sub_blocks * sizeof (struct stored_sub_block);
}
// Allow the makefile to optionally configure for no compression
#ifdef TOKU_CONFIG_NO_COMPRESSION
static enum toku_compression_method toku_compress_method = TOKU_NO_COMPRESSION;
#else
static enum toku_compression_method toku_compress_method = TOKU_QUICKLZ_METHOD;
#endif
void
set_compressed_size_bound(struct sub_block *se) {
se->compressed_size_bound = toku_compress_bound(toku_compress_method, se->uncompressed_size);
set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method) {
se->compressed_size_bound = toku_compress_bound(method, se->uncompressed_size);
}
// get the sum of the sub block compressed sizes
size_t
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[]) {
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method) {
size_t compressed_size_bound = 0;
for (int i = 0; i < n_sub_blocks; i++) {
sub_block[i].compressed_size_bound = toku_compress_bound(toku_compress_method, sub_block[i].uncompressed_size);
sub_block[i].compressed_size_bound = toku_compress_bound(method, sub_block[i].uncompressed_size);
compressed_size_bound += sub_block[i].compressed_size_bound;
}
return compressed_size_bound;
......@@ -147,23 +140,11 @@ get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offse
#include "workset.h"
void
compress_work_init(struct compress_work *w, struct sub_block *sub_block) {
compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block) {
w->method = method;
w->sub_block = sub_block;
}
void toku_set_default_compression_method (enum toku_compression_method a) {
switch (a) {
case TOKU_NO_COMPRESSION:
case TOKU_ZLIB_METHOD:
case TOKU_QUICKLZ_METHOD:
toku_compress_method = a;
return;
}
// fall through to error
assert(0);
}
//
// takes the uncompressed contents of sub_block
// and compresses them into sb_compressed_ptr
......@@ -172,28 +153,30 @@ void toku_set_default_compression_method (enum toku_compression_method a) {
//
u_int32_t
compress_nocrc_sub_block(
struct sub_block *sub_block,
void* sb_compressed_ptr,
u_int32_t cs_bound
)
struct sub_block *sub_block,
void* sb_compressed_ptr,
u_int32_t cs_bound,
enum toku_compression_method method
)
{
// compress it
Bytef *uncompressed_ptr = (Bytef *) sub_block->uncompressed_ptr;
Bytef *compressed_ptr = (Bytef *) sb_compressed_ptr;
uLongf uncompressed_len = sub_block->uncompressed_size;
uLongf real_compressed_len = cs_bound;
toku_compress(toku_compress_method,
toku_compress(method,
compressed_ptr, &real_compressed_len,
uncompressed_ptr, uncompressed_len);
return real_compressed_len;
return real_compressed_len;
}
void
compress_sub_block(struct sub_block *sub_block) {
compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method) {
sub_block->compressed_size = compress_nocrc_sub_block(
sub_block,
sub_block->compressed_ptr,
sub_block->compressed_size_bound
sub_block,
sub_block->compressed_ptr,
sub_block->compressed_size_bound,
method
);
// checksum it
sub_block->xsum = x1764_memory(sub_block->compressed_ptr, sub_block->compressed_size);
......@@ -206,14 +189,14 @@ compress_worker(void *arg) {
struct compress_work *w = (struct compress_work *) workset_get(ws);
if (w == NULL)
break;
compress_sub_block(w->sub_block);
compress_sub_block(w->sub_block, w->method);
}
workset_release_ref(ws);
return arg;
}
size_t
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool) {
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method) {
char *compressed_base_ptr = compressed_ptr;
size_t compressed_len;
......@@ -223,7 +206,7 @@ compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *un
// single sub-block
sub_block[0].uncompressed_ptr = uncompressed_ptr;
sub_block[0].compressed_ptr = compressed_ptr;
compress_sub_block(&sub_block[0]);
compress_sub_block(&sub_block[0], method);
compressed_len = sub_block[0].compressed_size;
} else {
// multiple sub-blocks
......@@ -241,7 +224,7 @@ compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *un
for (int i = 0; i < n_sub_blocks; i++) {
sub_block[i].uncompressed_ptr = uncompressed_ptr;
sub_block[i].compressed_ptr = compressed_ptr;
compress_work_init(&work[i], &sub_block[i]);
compress_work_init(&work[i], method, &sub_block[i]);
workset_put_locked(&ws, &work[i].base);
uncompressed_ptr += sub_block[i].uncompressed_size;
compressed_ptr += sub_block[i].compressed_size_bound;
......
......@@ -12,9 +12,6 @@
extern "C" {
#endif
void toku_set_default_compression_method (enum toku_compression_method a);
// Effect: for the following functions, set the default compression method.
static const int max_sub_blocks = 8;
static const int target_sub_block_size = 512*1024;
static const int max_basement_nodes = 32;
......@@ -46,11 +43,11 @@ size_t
sub_block_header_size(int n_sub_blocks);
void
set_compressed_size_bound(struct sub_block *se);
set_compressed_size_bound(struct sub_block *se, enum toku_compression_method method);
// get the sum of the sub block compressed sizes
size_t
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[]);
get_sum_compressed_size_bound(int n_sub_blocks, struct sub_block sub_block[], enum toku_compression_method method);
// get the sum of the sub block uncompressed sizes
size_t
......@@ -76,27 +73,29 @@ get_sub_block_index(int n_sub_blocks, struct sub_block sub_block[], size_t offse
struct compress_work {
struct work base;
enum toku_compression_method method;
struct sub_block *sub_block;
};
void
compress_work_init(struct compress_work *w, struct sub_block *sub_block);
compress_work_init(struct compress_work *w, enum toku_compression_method method, struct sub_block *sub_block);
u_int32_t
compress_nocrc_sub_block(
struct sub_block *sub_block,
void* sb_compressed_ptr,
u_int32_t cs_bound
struct sub_block *sub_block,
void* sb_compressed_ptr,
u_int32_t cs_bound,
enum toku_compression_method method
);
void
compress_sub_block(struct sub_block *sub_block);
compress_sub_block(struct sub_block *sub_block, enum toku_compression_method method);
void *
compress_worker(void *arg);
size_t
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool);
compress_all_sub_blocks(int n_sub_blocks, struct sub_block sub_block[], char *uncompressed_ptr, char *compressed_ptr, int num_cores, struct toku_thread_pool *pool, enum toku_compression_method method);
struct decompress_work {
struct work base;
......
......@@ -22,6 +22,7 @@ static void test_compress_buf_method (unsigned char *buf, int i, enum toku_compr
static void test_compress_buf (unsigned char *buf, int i) {
test_compress_buf_method(buf, i, TOKU_ZLIB_METHOD);
test_compress_buf_method(buf, i, TOKU_QUICKLZ_METHOD);
test_compress_buf_method(buf, i, TOKU_LZMA_METHOD);
}
static void test_compress_i (int i) {
......
......@@ -8,6 +8,7 @@
#include <errno.h>
#include <string.h>
#include "compress.h"
#include "sub_block.h"
int verbose;
......@@ -25,7 +26,7 @@ set_uint8_at_offset(void *vp, size_t offset, uint8_t newv) {
}
static void
test_sub_block_checksum(void *buf, int total_size, int my_max_sub_blocks, int n_cores, struct toku_thread_pool *pool) {
test_sub_block_checksum(void *buf, int total_size, int my_max_sub_blocks, int n_cores, struct toku_thread_pool *pool, enum toku_compression_method method) {
if (verbose)
printf("%s:%d %d %d\n", __FUNCTION__, __LINE__, total_size, my_max_sub_blocks);
......@@ -40,11 +41,11 @@ test_sub_block_checksum(void *buf, int total_size, int my_max_sub_blocks, int n_
struct sub_block sub_blocks[n_sub_blocks];
set_all_sub_block_sizes(total_size, sub_block_size, n_sub_blocks, sub_blocks);
size_t cbuf_size_bound = get_sum_compressed_size_bound(n_sub_blocks, sub_blocks);
size_t cbuf_size_bound = get_sum_compressed_size_bound(n_sub_blocks, sub_blocks, method);
void *cbuf = toku_malloc(cbuf_size_bound);
assert(cbuf);
size_t cbuf_size = compress_all_sub_blocks(n_sub_blocks, sub_blocks, buf, cbuf, n_cores, pool);
size_t cbuf_size = compress_all_sub_blocks(n_sub_blocks, sub_blocks, buf, cbuf, n_cores, pool, method);
assert(cbuf_size <= cbuf_size_bound);
void *ubuf = toku_malloc(total_size);
......@@ -92,16 +93,16 @@ set_random(void *buf, int total_size) {
}
static void
run_test(int total_size, int n_cores, struct toku_thread_pool *pool) {
run_test(int total_size, int n_cores, struct toku_thread_pool *pool, enum toku_compression_method method) {
void *buf = toku_malloc(total_size);
assert(buf);
for (int my_max_sub_blocks = 1; my_max_sub_blocks <= max_sub_blocks; my_max_sub_blocks++) {
memset(buf, 0, total_size);
test_sub_block_checksum(buf, total_size, my_max_sub_blocks, n_cores, pool);
test_sub_block_checksum(buf, total_size, my_max_sub_blocks, n_cores, pool, method);
set_random(buf, total_size);
test_sub_block_checksum(buf, total_size, my_max_sub_blocks, n_cores, pool);
test_sub_block_checksum(buf, total_size, my_max_sub_blocks, n_cores, pool, method);
}
toku_free(buf);
......@@ -141,7 +142,10 @@ test_main (int argc, const char *argv[]) {
for (int total_size = 256*1024; total_size <= 4*1024*1024; total_size *= 2) {
for (int size = total_size - e; size <= total_size + e; size++) {
run_test(size, n_cores, pool);
run_test(size, n_cores, pool, TOKU_NO_COMPRESSION);
run_test(size, n_cores, pool, TOKU_ZLIB_METHOD);
run_test(size, n_cores, pool, TOKU_QUICKLZ_METHOD);
run_test(size, n_cores, pool, TOKU_LZMA_METHOD);
}
}
......
......@@ -13,7 +13,7 @@
int verbose;
static void
test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int n_cores) {
test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int n_cores, enum toku_compression_method method) {
if (verbose)
printf("%s:%d %d %d\n", __FUNCTION__, __LINE__, total_size, my_max_sub_blocks);
......@@ -28,11 +28,11 @@ test_sub_block_compression(void *buf, int total_size, int my_max_sub_blocks, int
struct sub_block sub_blocks[n_sub_blocks];
set_all_sub_block_sizes(total_size, sub_block_size, n_sub_blocks, sub_blocks);
size_t cbuf_size_bound = get_sum_compressed_size_bound(n_sub_blocks, sub_blocks);
size_t cbuf_size_bound = get_sum_compressed_size_bound(n_sub_blocks, sub_blocks, method);
void *cbuf = toku_malloc(cbuf_size_bound);
assert(cbuf);
size_t cbuf_size = compress_all_sub_blocks(n_sub_blocks, sub_blocks, buf, cbuf, n_cores, NULL);
size_t cbuf_size = compress_all_sub_blocks(n_sub_blocks, sub_blocks, buf, cbuf, n_cores, NULL, method);
assert(cbuf_size <= cbuf_size_bound);
void *ubuf = toku_malloc(total_size);
......@@ -55,16 +55,16 @@ set_random(void *buf, int total_size) {
}
static void
run_test(int total_size, int n_cores) {
run_test(int total_size, int n_cores, enum toku_compression_method method) {
void *buf = toku_malloc(total_size);
assert(buf);
for (int my_max_sub_blocks = 1; my_max_sub_blocks <= max_sub_blocks; my_max_sub_blocks++) {
memset(buf, 0, total_size);
test_sub_block_compression(buf, total_size, my_max_sub_blocks, n_cores);
test_sub_block_compression(buf, total_size, my_max_sub_blocks, n_cores, method);
set_random(buf, total_size);
test_sub_block_compression(buf, total_size, my_max_sub_blocks, n_cores);
test_sub_block_compression(buf, total_size, my_max_sub_blocks, n_cores, method);
}
toku_free(buf);
......@@ -96,7 +96,10 @@ test_main (int argc, const char *argv[]) {
for (int total_size = 256*1024; total_size <= 4*1024*1024; total_size *= 2) {
for (int size = total_size - e; size <= total_size + e; size++) {
run_test(size, n_cores);
run_test(size, n_cores, TOKU_NO_COMPRESSION);
run_test(size, n_cores, TOKU_ZLIB_METHOD);
run_test(size, n_cores, TOKU_QUICKLZ_METHOD);
run_test(size, n_cores, TOKU_LZMA_METHOD);
}
}
......
......@@ -80,8 +80,7 @@ export.def: export.map Makefile
buildlocktrees: $(LOCKTREE) $(RANGETREE) ;
$(LIBRARY): $(OBJS) $(LOCKTREE) $(RANGETREE) $(NEWBRT) $(DEPEND_COMPILE) $(DEPEND_LINK)
NEWBRT_OFILES = $(patsubst %,../%,$(shell cat ../lib/newbrt.olist))
$(LIBRARY): LINK_FILES=$(OBJS) $(LOCKTREE) $(RANGETREE) $(NEWBRT_OFILES)
$(LIBRARY): LINK_FILES=$(OBJS) $(LOCKTREE) $(RANGETREE) ../lib/libnewbrt.a $(LZMA_A)
ifeq ($(CC),icc)
ifeq ($(HAVE_CILK),1)
ifeq (0,0)
......
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
/*
* Test that different compression methods can be used.
*/
#include <db.h>
#include "test.h"
static const int VAL_SIZE = 248;
static const int NUM_ROWS = 1 << 12;
static int
insert(DB_ENV *env, DB *db, void *UU(extra))
{
char val[VAL_SIZE];
memset(val, 0, sizeof val);
DB_TXN *txn;
int r = env->txn_begin(env, 0, &txn, 0);
CKERR(r);
for (int i = 0; i < NUM_ROWS; ++i) {
DBT k, v;
*((int *) val) = i;
r = db->put(db, txn, dbt_init(&k, &i, sizeof i), dbt_init(&v, val, sizeof val), 0);
CKERR(r);
}
r = txn->commit(txn, 0);
CKERR(r);
return 0;
}
static int
lookup(DB_ENV *env, DB *db, void *UU(extra))
{
DB_TXN *txn;
int r = env->txn_begin(env, 0, &txn, 0);
CKERR(r);
for (int i = 0; i < NUM_ROWS; ++i) {
DBT k, v;
r = db->get(db, txn, dbt_init(&k, &i, sizeof i), dbt_init(&v, NULL, 0), 0);
CKERR(r);
assert(v.size == (size_t) VAL_SIZE);
assert(*(int *) v.data == i);
}
r = txn->commit(txn, 0);
CKERR(r);
return 0;
}
typedef int (*db_callback)(DB_ENV *env, DB *db, void *extra);
static int
with_open_db(db_callback cb, void *cb_extra, bool set_method, enum toku_compression_method method)
{
DB_ENV *env;
DB *db;
int r;
r = db_env_create(&env, 0);
CKERR(r);
r = env->open(env, ENVDIR, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
r = db_create(&db, env, 0);
CKERR(r);
{
DB_TXN *txn;
r = env->txn_begin(env, 0, &txn, 0);
CKERR(r);
r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
if (set_method) {
r = db->set_compression_method(db, method);
CKERR(r);
}
r = txn->commit(txn, 0);
CKERR(r);
}
{
enum toku_compression_method saved_method;
r = db->get_compression_method(db, &saved_method);
CKERR(r);
assert(saved_method == method);
}
int cr = cb(env, db, cb_extra);
r = db->close(db, 0);
CKERR(r);
r = env->close(env, 0);
CKERR(r);
return cr;
}
static void
run_test(enum toku_compression_method method)
{
int r;
r = system("rm -rf " ENVDIR);
CKERR(r);
r = toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
r = with_open_db(insert, NULL, true, method);
CKERR(r);
r = with_open_db(lookup, NULL, false, method);
CKERR(r);
}
int
test_main(int argc, char *const argv[])
{
parse_args(argc, argv);
run_test(TOKU_NO_COMPRESSION);
run_test(TOKU_ZLIB_METHOD);
run_test(TOKU_QUICKLZ_METHOD);
run_test(TOKU_LZMA_METHOD);
return 0;
}
......@@ -579,6 +579,20 @@ toku_db_get_readpagesize(DB *db, u_int32_t *readpagesize_ptr) {
return r;
}
static int
toku_db_set_compression_method(DB *db, enum toku_compression_method compression_method) {
HANDLE_PANICKED_DB(db);
int r = toku_brt_set_compression_method(db->i->brt, compression_method);
return r;
}
static int
toku_db_get_compression_method(DB *db, enum toku_compression_method *compression_method_ptr) {
HANDLE_PANICKED_DB(db);
int r = toku_brt_get_compression_method(db->i->brt, compression_method_ptr);
return r;
}
static int
toku_db_stat64(DB * db, DB_TXN *txn, DB_BTREE_STAT64 *s) {
HANDLE_PANICKED_DB(db);
......@@ -834,6 +848,16 @@ locked_db_get_readpagesize(DB *db, u_int32_t *readpagesize_ptr) {
toku_ydb_lock(); int r = toku_db_get_readpagesize(db, readpagesize_ptr); toku_ydb_unlock(); return r;
}
static int
locked_db_set_compression_method(DB *db, enum toku_compression_method compression_method) {
toku_ydb_lock(); int r = toku_db_set_compression_method(db, compression_method); toku_ydb_unlock(); return r;
}
static int
locked_db_get_compression_method(DB *db, enum toku_compression_method *compression_method_ptr) {
toku_ydb_lock(); int r = toku_db_get_compression_method(db, compression_method_ptr); toku_ydb_unlock(); return r;
}
// TODO 2216 delete this
static int
locked_db_fd(DB * UU(db), int * UU(fdp)) {
......@@ -1033,6 +1057,8 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
SDB(get_pagesize);
SDB(set_readpagesize);
SDB(get_readpagesize);
SDB(set_compression_method);
SDB(get_compression_method);
SDB(set_flags);
SDB(get_flags);
SDB(fd);
......
......@@ -396,6 +396,17 @@ else
ARFLAGS = $(DBG_ARFLAGS)
endif
LZMA_H = $(TOKUROOT)include/lzma.h
ifeq ($(CC),icc)
LZMA_A = $(TOKUROOT)lib/liblzma_icc_opt.$(AEXT)
else
ifeq ($(DEBUG),1)
LZMA_A = $(TOKUROOT)lib/liblzma_gcc_dbg.$(AEXT)
else
LZMA_A = $(TOKUROOT)lib/liblzma_gcc_opt.$(AEXT)
endif
endif
CFLAGS += $(WALL) $(W64) $(WERROR) $(FORMAT) $(VISIBILITY) $(FPICFLAGS) $(SHADOW) $(ARCHFLAGS)
CFLAGS += $(OPTFLAGS) $(GCOV_FLAGS) $(PROF_FLAGS)
CFLAGS += $(SYMBOLS) $(SKIP_WARNING) $(C99) $(CCQUIET)
......
......@@ -47,9 +47,7 @@ YDB_OBJS=$(wildcard ../src/*.$(OEXT))
$(STATIC_UTILS) $(UTILS): LINK_FILES=$(YDB_OBJS) $(TOKUROOT)lib/libtokudb.$(AEXT) $(LOCKTREE) $(RANGETREE) $(TOKUROOT)lib/libnewbrt.$(AEXT) $(LIBPORTABILITY_A)
else
$(UTILS): DLINK_FILES=$(TOKUROOT)lib/libtokudb.$(SOEXT) $(TOKUROOT)lib/libtokuportability.$(SOEXT)
NEWBRT_OBJS=$(patsubst %,../%,$(shell cat $(TOKUROOT)lib/newbrt.olist))
TOKUPORTABILITY_OBJS=$(patsubst %,../%,$(shell cat $(TOKUROOT)lib/tokuportability.olist))
$(STATIC_UTILS): LINK_FILES+= $(NEWBRT_OBJS) $(TOKUPORTABILITY_OBJS) $(YDB_OBJS) ../src/lock_tree/locktree.a ../src/range_tree/rangetree.a
$(STATIC_UTILS): LINK_FILES+= $(YDB_OBJS) ../src/lock_tree/locktree.a ../src/range_tree/rangetree.a ../lib/libnewbrt.a ../lib/libtokuportability.a $(LZMA_A)
endif
ifeq ($(BRTLOADER),cilk)
......
CFLAGS=-W -Wall -Werror -ggdb3 -O3 -std=c99
#CPPFLAGS=-I../xz-5.0.3/src/liblzma/api
LDLIBS=-llzma
#LDFLAGS=-L../xz-5.0.3/src/liblzma/.libs -Wl,-rpath,../xz-5.0.3/src/liblzma/.libs
default: lzma-compress lzma-compress-mt
lzma-compress-mt: CFLAGS+=-pthread
clean:
rm -f lzma-compress lzma-compress-mt
/* An example of using lzma. Written by Bradley Tue Mar 6 2012 */
/* Read stdin in blocks of size up to 1MB, compress it and write it to stdout.
* Each output block is preceeded by two 4-byte numbers that says how big the compressed block is and how big the uncompressed block is.
*/
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <sys/time.h>
#include <lzma.h>
float tdiff (struct timeval *start, struct timeval *end) {
return (end->tv_sec-start->tv_sec) +1e-6*(end->tv_usec - start->tv_usec);
}
int main (int argc, char *argv[] __attribute__((__unused__)))
{
assert(argc==1);
const size_t in_max = 1024*1024;
uint8_t *in_data = malloc(in_max);
uint8_t *reconstituted = malloc(in_max);
assert(in_data);
size_t out_current = 0;
uint8_t *out_data = NULL;
double compress_time = 0, decompress_time = 0;
while (1) {
size_t in_size = fread(in_data, 1, in_max, stdin);
if (in_size==0) {
assert(feof(stdin));
break;
}
printf("Got %ld bytes\n", in_size);
size_t out_bound = lzma_stream_buffer_bound(in_size);
printf("out bound = %ld\n", out_bound);
if (out_current < out_bound) {
out_data = realloc(out_data, out_bound);
out_current = out_bound;
}
size_t compressed_size;
{
struct timeval tstart,tend;
gettimeofday(&tstart, NULL);
size_t out_pos = 0;
lzma_ret r = lzma_easy_buffer_encode(6, LZMA_CHECK_CRC32, NULL,
in_data, in_size,
out_data, &out_pos, out_current);
gettimeofday(&tend, NULL);
compress_time += tdiff(&tstart, &tend);
printf("r=%d\n", r);
assert(r==LZMA_OK);
printf("out size = %ld\n", out_pos);
compressed_size = out_pos;
}
{
struct timeval tstart,tend;
gettimeofday(&tstart, NULL);
uint64_t memlimit = UINT64_MAX;
size_t compressed_pos = 0;
size_t reconstituted_pos = 0;
lzma_ret r = lzma_stream_buffer_decode(&memlimit,
0,
NULL,
out_data,
&compressed_pos, compressed_size,
reconstituted, &reconstituted_pos, in_max);
gettimeofday(&tend, NULL);
decompress_time += tdiff(&tstart, &tend);
printf("r=%d\n", r);
assert(r==LZMA_OK);
}
}
printf("Compression time = %9.6fs\n", compress_time);
printf("Decmpression time = %9.6fs\n", decompress_time);
free(in_data);
free(out_data);
free(reconstituted);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment