Lib: add Dependencies.md and update some libraries

This commit is contained in:
yuanyuanxiang
2025-07-27 17:32:28 +08:00
parent c4a613a2e1
commit c9904b2822
20 changed files with 871 additions and 790 deletions

481
compress/zstd/zdict.h Normal file
View File

@@ -0,0 +1,481 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_ZDICT_H
#define ZSTD_ZDICT_H
/*====== Dependencies ======*/
#include <stddef.h> /* size_t */
#if defined (__cplusplus)
extern "C" {
#endif
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
#ifndef ZDICTLIB_VISIBLE
/* Backwards compatibility with old macro name */
# ifdef ZDICTLIB_VISIBILITY
# define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default")))
# else
# define ZDICTLIB_VISIBLE
# endif
#endif
#ifndef ZDICTLIB_HIDDEN
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden")))
# else
# define ZDICTLIB_HIDDEN
# endif
#endif
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
# define ZDICTLIB_API ZDICTLIB_VISIBLE
#endif
/*******************************************************************************
* Zstd dictionary builder
*
* FAQ
* ===
* Why should I use a dictionary?
* ------------------------------
*
* Zstd can use dictionaries to improve compression ratio of small data.
* Traditionally small files don't compress well because there is very little
* repetition in a single sample, since it is small. But, if you are compressing
* many similar files, like a bunch of JSON records that share the same
* structure, you can train a dictionary on ahead of time on some samples of
* these files. Then, zstd can use the dictionary to find repetitions that are
* present across samples. This can vastly improve compression ratio.
*
* When is a dictionary useful?
* ----------------------------
*
* Dictionaries are useful when compressing many small files that are similar.
* The larger a file is, the less benefit a dictionary will have. Generally,
* we don't expect dictionary compression to be effective past 100KB. And the
* smaller a file is, the more we would expect the dictionary to help.
*
* How do I use a dictionary?
* --------------------------
*
* Simply pass the dictionary to the zstd compressor with
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
* more advanced functions that allow selecting some options, see zstd.h for
* complete documentation.
*
* What is a zstd dictionary?
* --------------------------
*
* A zstd dictionary has two pieces: Its header, and its content. The header
* contains a magic number, the dictionary ID, and entropy tables. These
* entropy tables allow zstd to save on header costs in the compressed file,
* which really matters for small data. The content is just bytes, which are
* repeated content that is common across many samples.
*
* What is a raw content dictionary?
* ---------------------------------
*
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
* content dictionary.
*
* How do I train a dictionary?
* ----------------------------
*
* Gather samples from your use case. These samples should be similar to each
* other. If you have several use cases, you could try to train one dictionary
* per use case.
*
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
* dictionary. There are a few advanced versions of this function, but this
* is a great starting point. If you want to further tune your dictionary
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
*
* If the dictionary training function fails, that is likely because you
* either passed too few samples, or a dictionary would not be effective
* for your data. Look at the messages that the dictionary trainer printed,
* if it doesn't say too few samples, then a dictionary would not be effective.
*
* How large should my dictionary be?
* ----------------------------------
*
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
* dictionary larger than that. But, most use cases can get away with a
* smaller dictionary. The advanced dictionary builders can automatically
* shrink the dictionary for you, and select the smallest size that doesn't
* hurt compression ratio too much. See the `shrinkDict` parameter.
* A smaller dictionary can save memory, and potentially speed up
* compression.
*
* How many samples should I provide to the dictionary builder?
* ------------------------------------------------------------
*
* We generally recommend passing ~100x the size of the dictionary
* in samples. A few thousand should suffice. Having too few samples
* can hurt the dictionaries effectiveness. Having more samples will
* only improve the dictionaries effectiveness. But having too many
* samples can slow down the dictionary builder.
*
* How do I determine if a dictionary will be effective?
* -----------------------------------------------------
*
* Simply train a dictionary and try it out. You can use zstd's built in
* benchmarking tool to test the dictionary effectiveness.
*
* # Benchmark levels 1-3 without a dictionary
* zstd -b1e3 -r /path/to/my/files
* # Benchmark levels 1-3 with a dictionary
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
*
* When should I retrain a dictionary?
* -----------------------------------
*
* You should retrain a dictionary when its effectiveness drops. Dictionary
* effectiveness drops as the data you are compressing changes. Generally, we do
* expect dictionaries to "decay" over time, as your data changes, but the rate
* at which they decay depends on your use case. Internally, we regularly
* retrain dictionaries, and if the new dictionary performs significantly
* better than the old dictionary, we will ship the new dictionary.
*
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
* -------------------------------------------------------------------------
*
* If you have a raw content dictionary, e.g. by manually constructing it, or
* using a third-party dictionary builder, you can turn it into a zstd
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
* provide some samples of the data. It will add the zstd header to the
* raw content, which contains a dictionary ID and entropy tables, which
* will improve compression ratio, and allow zstd to write the dictionary ID
* into the frame, if you so choose.
*
* Do I have to use zstd's dictionary builder?
* -------------------------------------------
*
* No! You can construct dictionary content however you please, it is just
* bytes. It will always be valid as a raw content dictionary. If you want
* a zstd dictionary, which can improve compression ratio, use
* `ZDICT_finalizeDictionary()`.
*
* What is the attack surface of a zstd dictionary?
* ------------------------------------------------
*
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
* zstd should never crash, or access out-of-bounds memory no matter what
* the dictionary is. However, if an attacker can control the dictionary
* during decompression, they can cause zstd to generate arbitrary bytes,
* just like if they controlled the compressed data.
*
******************************************************************************/
/*! ZDICT_trainFromBuffer():
* Train a dictionary from an array of samples.
* Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
* f=20, and accel=1.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* Note: Dictionary training will fail if there are not enough samples to construct a
* dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
* If dictionary training fails, you should use zstd without a dictionary, as the dictionary
* would've been ineffective anyways. If you believe your samples would benefit from a dictionary
* please open an issue with details, and we can look into it.
* Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples);
typedef struct {
int compressionLevel; /**< optimize for a specific zstd compression level; 0 means default */
unsigned notificationLevel; /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
unsigned dictID; /**< force dictID value; 0 means auto mode (32-bits random value)
* NOTE: The zstd format reserves some dictionary IDs for future use.
* You may use them in private settings, but be warned that they
* may be used by zstd in a public dictionary registry in the future.
* These dictionary IDs are:
* - low range : <= 32767
* - high range : >= (2^31)
*/
} ZDICT_params_t;
/*! ZDICT_finalizeDictionary():
* Given a custom content as a basis for dictionary, and a set of samples,
* finalize dictionary by adding headers and statistics according to the zstd
* dictionary format.
*
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each
* sample in order. The samples are used to construct the statistics, so they
* should be representative of what you will compress with this dictionary.
*
* The compression level can be set in `parameters`. You should pass the
* compression level you expect to use in production. The statistics for each
* compression level differ, so tuning the dictionary for the compression level
* can help quite a bit.
*
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
* a random dictionary ID for you, but we can't guarantee no collisions.
*
* The dstDictBuffer and the dictContent may overlap, and the content will be
* appended to the end of the header. If the header + the content doesn't fit in
* maxDictSize the beginning of the content is truncated to make room, since it
* is presumed that the most profitable content is at the end of the dictionary,
* since that is the cheapest to reference.
*
* `maxDictSize` must be >= max(dictContentSize, ZDICT_DICTSIZE_MIN).
*
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
* or an error code, which can be tested by ZDICT_isError().
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
* instructed to, using notificationLevel>0.
* NOTE: This function currently may fail in several edge cases including:
* * Not enough samples
* * Samples are uncompressible
* * Samples are all exactly the same
*/
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
const void* dictContent, size_t dictContentSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
/*====== Helper functions ======*/
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_ZDICT_H */
#if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC)
#define ZSTD_ZDICT_H_STATIC
#if defined (__cplusplus)
extern "C" {
#endif
/* This can be overridden externally to hide static symbols. */
#ifndef ZDICTLIB_STATIC_API
# if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
# define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE
# elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
# define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE
# else
# define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE
# endif
#endif
/* ====================================================================================
* The definitions in this section are considered experimental.
* They should never be used with a dynamic library, as they may change in the future.
* They are provided for advanced usages.
* Use them only in association with static linking.
* ==================================================================================== */
#define ZDICT_DICTSIZE_MIN 256
/* Deprecated: Remove in v1.6.0 */
#define ZDICT_CONTENTSIZE_MIN 128
/*! ZDICT_cover_params_t:
* k and d are the only required parameters.
* For others, value 0 means default.
*/
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
ZDICT_params_t zParams;
} ZDICT_cover_params_t;
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
ZDICT_params_t zParams;
} ZDICT_fastCover_params_t;
/*! ZDICT_trainFromBuffer_cover():
* Train a dictionary from an array of samples using the COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
void *dictBuffer, size_t dictBufferCapacity,
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t parameters);
/*! ZDICT_optimizeTrainFromBuffer_cover():
* The same requirements as above hold for all the parameters except `parameters`.
* This function tries many parameter combinations and picks the best parameters.
* `*parameters` is filled with the best parameters found,
* dictionary constructed with those parameters is stored in `dictBuffer`.
*
* All of the parameters d, k, steps are optional.
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
* if steps is zero it defaults to its default value.
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
*/
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t* parameters);
/*! ZDICT_trainFromBuffer_fastCover():
* Train a dictionary from an array of samples using a modified version of COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* d and k are required.
* All other parameters are optional, will use default values if not provided
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t parameters);
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
* The same requirements as above hold for all the parameters except `parameters`.
* This function tries many parameter combinations (specifically, k and d combinations)
* and picks the best parameters. `*parameters` is filled with the best parameters found,
* dictionary constructed with those parameters is stored in `dictBuffer`.
* All of the parameters d, k, steps, f, and accel are optional.
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
* if steps is zero it defaults to its default value.
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
* If f is zero, default value of 20 is used.
* If accel is zero, default value of 1 is used.
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
*/
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
size_t dictBufferCapacity, const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t* parameters);
typedef struct {
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
ZDICT_params_t zParams;
} ZDICT_legacy_params_t;
/*! ZDICT_trainFromBuffer_legacy():
* Train a dictionary from an array of samples.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* `parameters` is optional and can be provided with values set to 0 to mean "default".
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
*/
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy(
void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_legacy_params_t parameters);
/* Deprecation warnings */
/* It is generally possible to disable deprecation warnings from compiler,
for example with -Wno-deprecated-declarations for gcc
or _CRT_SECURE_NO_WARNINGS in Visual.
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */
#else
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define ZDICT_DEPRECATED(message) [[deprecated(message)]]
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message)))
# elif (ZDICT_GCC_VERSION >= 301)
# define ZDICT_DEPRECATED(message) __attribute__((deprecated))
# elif defined(_MSC_VER)
# define ZDICT_DEPRECATED(message) __declspec(deprecated(message))
# else
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
# define ZDICT_DEPRECATED(message)
# endif
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
ZDICTLIB_STATIC_API
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_ZDICT_H_STATIC */

View File

@@ -7,17 +7,22 @@
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#if defined (__cplusplus)
extern "C" {
#endif
#ifndef ZSTD_H_235446
#define ZSTD_H_235446
/* ====== Dependencies ======*/
#include <limits.h> /* INT_MAX */
#include <stddef.h> /* size_t */
#include "zstd_errors.h" /* list of errors */
#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
#include <limits.h> /* INT_MAX */
#endif /* ZSTD_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
extern "C" {
#endif
/* ===== ZSTDLIB_API : control library symbols visibility ===== */
#ifndef ZSTDLIB_VISIBLE
@@ -57,7 +62,7 @@ extern "C" {
#else
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define ZSTD_DEPRECATED(message) [[deprecated(message)]]
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) || defined(__IAR_SYSTEMS_ICC__)
# define ZSTD_DEPRECATED(message) __attribute__((deprecated(message)))
# elif defined(__GNUC__) && (__GNUC__ >= 3)
# define ZSTD_DEPRECATED(message) __attribute__((deprecated))
@@ -106,9 +111,13 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 5
#define ZSTD_VERSION_RELEASE 6
#define ZSTD_VERSION_RELEASE 7
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
#if defined(WINDOWS)
# define ZSTD_compress ZSTD_compress_static
#endif
/*! ZSTD_versionNumber() :
* Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
ZSTDLIB_API unsigned ZSTD_versionNumber(void);
@@ -144,7 +153,7 @@ ZSTDLIB_API const char* ZSTD_versionString(void);
/***************************************
* Simple API
* Simple Core API
***************************************/
/*! ZSTD_compress() :
* Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
@@ -157,68 +166,80 @@ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
int compressionLevel);
/*! ZSTD_decompress() :
* `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
* `dstCapacity` is an upper bound of originalSize to regenerate.
* If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
* @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
* or an errorCode if it fails (which can be tested using ZSTD_isError()). */
* `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
* Multiple compressed frames can be decompressed at once with this method.
* The result will be the concatenation of all decompressed frames, back to back.
* `dstCapacity` is an upper bound of originalSize to regenerate.
* First frame's decompressed size can be extracted using ZSTD_getFrameContentSize().
* If maximum upper bound isn't known, prefer using streaming mode to decompress data.
* @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
* or an errorCode if it fails (which can be tested using ZSTD_isError()). */
ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
/*====== Decompression helper functions ======*/
/*! ZSTD_getFrameContentSize() : requires v1.3.0+
* `src` should point to the start of a ZSTD encoded frame.
* `srcSize` must be at least as large as the frame header.
* hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
* @return : - decompressed size of `src` frame content, if known
* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
* note 1 : a 0 return value means the frame is valid but "empty".
* note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
* When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
* In which case, it's necessary to use streaming mode to decompress data.
* Optionally, application can rely on some implicit limit,
* as ZSTD_decompress() only needs an upper bound of decompressed size.
* (For example, data could be necessarily cut into blocks <= 16 KB).
* note 3 : decompressed size is always present when compression is completed using single-pass functions,
* such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
* note 4 : decompressed size can be very large (64-bits value),
* potentially larger than what local system can handle as a single memory segment.
* In which case, it's necessary to use streaming mode to decompress data.
* note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
* Always ensure return value fits within application's authorized limits.
* Each application can set its own limits.
* note 6 : This function replaces ZSTD_getDecompressedSize() */
* `src` should point to the start of a ZSTD encoded frame.
* `srcSize` must be at least as large as the frame header.
* hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
* @return : - decompressed size of `src` frame content, if known
* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
* note 1 : a 0 return value means the frame is valid but "empty".
* When invoking this method on a skippable frame, it will return 0.
* note 2 : decompressed size is an optional field, it may not be present (typically in streaming mode).
* When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
* In which case, it's necessary to use streaming mode to decompress data.
* Optionally, application can rely on some implicit limit,
* as ZSTD_decompress() only needs an upper bound of decompressed size.
* (For example, data could be necessarily cut into blocks <= 16 KB).
* note 3 : decompressed size is always present when compression is completed using single-pass functions,
* such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
* note 4 : decompressed size can be very large (64-bits value),
* potentially larger than what local system can handle as a single memory segment.
* In which case, it's necessary to use streaming mode to decompress data.
* note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
* Always ensure return value fits within application's authorized limits.
* Each application can set its own limits.
* note 6 : This function replaces ZSTD_getDecompressedSize() */
#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
/*! ZSTD_getDecompressedSize() :
* NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
/*! ZSTD_getDecompressedSize() (obsolete):
* This function is now obsolete, in favor of ZSTD_getFrameContentSize().
* Both functions work the same way, but ZSTD_getDecompressedSize() blends
* "empty", "unknown" and "error" results to the same return value (0),
* while ZSTD_getFrameContentSize() gives them separate return values.
* @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize")
ZSTDLIB_API
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
* `src` should point to the start of a ZSTD frame or skippable frame.
* `srcSize` must be >= first frame size
* @return : the compressed size of the first frame starting at `src`,
* suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
* or an error code if input is invalid */
* or an error code if input is invalid
* Note 1: this method is called _find*() because it's not enough to read the header,
* it may have to scan through the frame's content, to reach its end.
* Note 2: this method also works with Skippable Frames. In which case,
* it returns the size of the complete skippable frame,
* which is always equal to its content size + 8 bytes for headers. */
ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
/*====== Helper functions ======*/
/* ZSTD_compressBound() :
/*====== Compression helper functions ======*/
/*! ZSTD_compressBound() :
* maximum compressed size in worst case single-pass scenario.
* When invoking `ZSTD_compress()` or any other one-pass compression function,
* When invoking `ZSTD_compress()`, or any other one-pass compression function,
* it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize)
* as it eliminates one potential failure scenario,
* aka not enough room in dst buffer to write the compressed frame.
* Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE .
* Note : ZSTD_compressBound() itself can fail, if @srcSize >= ZSTD_MAX_INPUT_SIZE .
* In which case, ZSTD_compressBound() will return an error code
* which can be tested using ZSTD_isError().
*
@@ -226,21 +247,25 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
* same as ZSTD_compressBound(), but as a macro.
* It can be used to produce constants, which can be useful for static allocation,
* for example to size a static array on stack.
* Will produce constant value 0 if srcSize too large.
* Will produce constant value 0 if srcSize is too large.
*/
#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
/*====== Error helper functions ======*/
/* ZSTD_isError() :
* Most ZSTD_* functions returning a size_t value can be tested for error,
* using ZSTD_isError().
* @return 1 if error, 0 otherwise
*/
ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */
ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */
ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */
ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
ZSTDLIB_API unsigned ZSTD_isError(size_t result); /*!< tells if a `size_t` function result is an error code */
ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); /* convert a result into an error code, which can be compared to error enum list */
ZSTDLIB_API const char* ZSTD_getErrorName(size_t result); /*!< provides readable string from a function result */
ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */
ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */
ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
/***************************************
@@ -248,17 +273,17 @@ ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compres
***************************************/
/*= Compression context
* When compressing many times,
* it is recommended to allocate a context just once,
* it is recommended to allocate a compression context just once,
* and reuse it for each successive compression operation.
* This will make workload friendlier for system's memory.
* This will make the workload easier for system's memory.
* Note : re-using context is just a speed / resource optimization.
* It doesn't change the compression ratio, which remains identical.
* Note 2 : In multi-threaded environments,
* use one different context per thread for parallel execution.
* Note 2: For parallel execution in multi-threaded environments,
* use one different context per thread .
*/
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* compatible with NULL pointer */
/*! ZSTD_compressCCtx() :
* Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
@@ -266,7 +291,7 @@ ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer *
* this function compresses at the requested compression level,
* __ignoring any other advanced parameter__ .
* If any advanced parameter was set using the advanced API,
* they will all be reset. Only `compressionLevel` remains.
* they will all be reset. Only @compressionLevel remains.
*/
ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
@@ -392,7 +417,7 @@ typedef enum {
* Special: value 0 means "use default strategy". */
ZSTD_c_targetCBlockSize=130, /* v1.5.6+
* Attempts to fit compressed block size into approximatively targetCBlockSize.
* Attempts to fit compressed block size into approximately targetCBlockSize.
* Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
* Note that it's not a guarantee, just a convergence target (default:0).
* No target when targetCBlockSize == 0.
@@ -488,7 +513,8 @@ typedef enum {
* ZSTD_c_stableOutBuffer
* ZSTD_c_blockDelimiters
* ZSTD_c_validateSequences
* ZSTD_c_useBlockSplitter
* ZSTD_c_blockSplitterLevel
* ZSTD_c_splitAfterSequences
* ZSTD_c_useRowMatchFinder
* ZSTD_c_prefetchCDictTables
* ZSTD_c_enableSeqProducerFallback
@@ -515,7 +541,8 @@ typedef enum {
ZSTD_c_experimentalParam16=1013,
ZSTD_c_experimentalParam17=1014,
ZSTD_c_experimentalParam18=1015,
ZSTD_c_experimentalParam19=1016
ZSTD_c_experimentalParam19=1016,
ZSTD_c_experimentalParam20=1017
} ZSTD_cParameter;
typedef struct {
@@ -855,7 +882,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
*
* A ZSTD_DStream object is required to track streaming operations.
* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
* ZSTD_DStream objects can be reused multiple times.
* ZSTD_DStream objects can be re-employed multiple times.
*
* Use ZSTD_initDStream() to start a new decompression operation.
* @return : recommended first input size
@@ -865,16 +892,21 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
* The function will update both `pos` fields.
* If `input.pos < input.size`, some input has not been consumed.
* It's up to the caller to present again remaining data.
*
* The function tries to flush all data decoded immediately, respecting output buffer size.
* If `output.pos < output.size`, decoder has flushed everything it could.
* But if `output.pos == output.size`, there might be some data left within internal buffers.,
*
* However, when `output.pos == output.size`, it's more difficult to know.
* If @return > 0, the frame is not complete, meaning
* either there is still some data left to flush within internal buffers,
* or there is more input to read to complete the frame (or both).
* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
* @return : 0 when a frame is completely decoded and fully flushed,
* or an error code, which can be tested using ZSTD_isError(),
* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
* the return value is a suggested next input size (just a hint for better latency)
* that will never request more than the remaining frame size.
* that will never request more than the remaining content of the compressed frame.
* *******************************************************************************/
typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
@@ -901,9 +933,10 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
* Function will update both input and output `pos` fields exposing current state via these fields:
* - `input.pos < input.size`, some input remaining and caller should provide remaining input
* on the next call.
* - `output.pos < output.size`, decoder finished and flushed all remaining buffers.
* - `output.pos == output.size`, potentially uncflushed data present in the internal buffers,
* call ZSTD_decompressStream() again to flush remaining data to output.
* - `output.pos < output.size`, decoder flushed internal output buffer.
* - `output.pos == output.size`, unflushed data potentially present in the internal buffers,
* check ZSTD_decompressStream() @return value,
* if > 0, invoke it again to flush remaining data to output.
* Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX.
*
* @return : 0 when a frame is completely decoded and fully flushed,
@@ -1181,6 +1214,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_H_235446 */
@@ -1196,6 +1233,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
#if defined (__cplusplus)
extern "C" {
#endif
/* This can be overridden externally to hide static symbols. */
#ifndef ZSTDLIB_STATIC_API
# if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
@@ -1307,7 +1348,7 @@ typedef struct {
*
* Note: This field is optional. ZSTD_generateSequences() will calculate the value of
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
* sequence provider's perspective. For example, ZSTD_compressSequences() does not
* sequence provider perspective. For example, ZSTD_compressSequences() does not
* use this 'rep' field at all (as of now).
*/
} ZSTD_Sequence;
@@ -1412,14 +1453,15 @@ typedef enum {
} ZSTD_literalCompressionMode_e;
typedef enum {
/* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final
* decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable
* or ZSTD_ps_disable allow for a force enable/disable the feature.
/* Note: This enum controls features which are conditionally beneficial.
* Zstd can take a decision on whether or not to enable the feature (ZSTD_ps_auto),
* but setting the switch to ZSTD_ps_enable or ZSTD_ps_disable force enable/disable the feature.
*/
ZSTD_ps_auto = 0, /* Let the library automatically determine whether the feature shall be enabled */
ZSTD_ps_enable = 1, /* Force-enable the feature */
ZSTD_ps_disable = 2 /* Do not use the feature */
} ZSTD_paramSwitch_e;
} ZSTD_ParamSwitch_e;
#define ZSTD_paramSwitch_e ZSTD_ParamSwitch_e /* old name */
/***************************************
* Frame header and size functions
@@ -1464,34 +1506,36 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src,
ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
/*! ZSTD_frameHeaderSize() :
* srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
* srcSize must be large enough, aka >= ZSTD_FRAMEHEADERSIZE_PREFIX.
* @return : size of the Frame Header,
* or an error code (if srcSize is too small) */
ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_FrameType_e;
#define ZSTD_frameType_e ZSTD_FrameType_e /* old name */
typedef struct {
unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
unsigned long long windowSize; /* can be very large, up to <= frameContentSize */
unsigned blockSizeMax;
ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
ZSTD_FrameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
unsigned headerSize;
unsigned dictID;
unsigned dictID; /* for ZSTD_skippableFrame, contains the skippable magic variant [0-15] */
unsigned checksumFlag;
unsigned _reserved1;
unsigned _reserved2;
} ZSTD_frameHeader;
} ZSTD_FrameHeader;
#define ZSTD_frameHeader ZSTD_FrameHeader /* old name */
/*! ZSTD_getFrameHeader() :
* decode Frame Header, or requires larger `srcSize`.
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
* decode Frame Header into `zfhPtr`, or requires larger `srcSize`.
* @return : 0 => header is complete, `zfhPtr` is correctly filled,
* >0 => `srcSize` is too small, @return value is the wanted `srcSize` amount, `zfhPtr` is not filled,
* or an error code, which can be tested using ZSTD_isError() */
ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize);
/*! ZSTD_getFrameHeader_advanced() :
* same as ZSTD_getFrameHeader(),
* with added capability to select a format (like ZSTD_f_zstd1_magicless) */
ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
/*! ZSTD_decompressionMargin() :
* Zstd supports in-place decompression, where the input and output buffers overlap.
@@ -1539,9 +1583,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSi
))
typedef enum {
ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */
} ZSTD_sequenceFormat_e;
ZSTD_sf_noBlockDelimiters = 0, /* ZSTD_Sequence[] has no block delimiters, just sequences */
ZSTD_sf_explicitBlockDelimiters = 1 /* ZSTD_Sequence[] contains explicit block delimiters */
} ZSTD_SequenceFormat_e;
#define ZSTD_sequenceFormat_e ZSTD_SequenceFormat_e /* old name */
/*! ZSTD_sequenceBound() :
* `srcSize` : size of the input buffer
@@ -1565,7 +1610,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
* @param zc The compression context to be used for ZSTD_compress2(). Set any
* compression parameters you need on this context.
* @param outSeqs The output sequences buffer of size @p outSeqsSize
* @param outSeqsSize The size of the output sequences buffer.
* @param outSeqsCapacity The size of the output sequences buffer.
* ZSTD_sequenceBound(srcSize) is an upper bound on the number
* of sequences that can be generated.
* @param src The source buffer to generate sequences from of size @p srcSize.
@@ -1583,7 +1628,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
ZSTDLIB_STATIC_API size_t
ZSTD_generateSequences(ZSTD_CCtx* zc,
ZSTD_Sequence* outSeqs, size_t outSeqsSize,
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
const void* src, size_t srcSize);
/*! ZSTD_mergeBlockDelimiters() :
@@ -1603,7 +1648,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
* Compress an array of ZSTD_Sequence, associated with @src buffer, into dst.
* @src contains the entire input (not just the literals).
* If @srcSize > sum(sequence.length), the remaining bytes are considered all literals
* If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
* If a dictionary is included, then the cctx should reference the dict (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.).
* The entire source is compressed into a single frame.
*
* The compression behavior changes based on cctx params. In particular:
@@ -1612,11 +1657,17 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
* the block size derived from the cctx, and sequences may be split. This is the default setting.
*
* If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
* block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
* valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
*
* If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
* When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes
* using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit
* can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation.
* By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10).
* ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction.
*
* If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined
* behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) and then bail out and return an error.
*
* In addition to the two adjustable experimental params, there are other important cctx params.
* - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
@@ -1624,15 +1675,42 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
* - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
*
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
* Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
* and cannot emit an RLE block that disagrees with the repcode history
* Note: Repcodes are, as of now, always re-calculated within this function, ZSTD_Sequence.rep is effectively unused.
* Dev Note: Once ability to ingest repcodes become available, the explicit block delims mode must respect those repcodes exactly,
* and cannot emit an RLE block that disagrees with the repcode history.
* @return : final compressed size, or a ZSTD error code.
*/
ZSTDLIB_STATIC_API size_t
ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize);
ZSTD_compressSequences(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize);
/*! ZSTD_compressSequencesAndLiterals() :
* This is a variant of ZSTD_compressSequences() which,
* instead of receiving (src,srcSize) as input parameter, receives (literals,litSize),
* aka all the literals, already extracted and laid out into a single continuous buffer.
* This can be useful if the process generating the sequences also happens to generate the buffer of literals,
* thus skipping an extraction + caching stage.
* It's a speed optimization, useful when the right conditions are met,
* but it also features the following limitations:
* - Only supports explicit delimiter mode
* - Currently does not support Sequences validation (so input Sequences are trusted)
* - Not compatible with frame checksum, which must be disabled
* - If any block is incompressible, will fail and return an error
* - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
* - @litBufCapacity is the size of the underlying buffer into which literals are written, starting at address @literals.
* @litBufCapacity must be at least 8 bytes larger than @litSize.
* - @decompressedSize must be correct, and correspond to the sum of all Sequences. Any discrepancy will generate an error.
* @return : final compressed size, or a ZSTD error code.
*/
ZSTDLIB_STATIC_API size_t
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t nbSequences,
const void* literals, size_t litSize, size_t litBufCapacity,
size_t decompressedSize);
/*! ZSTD_writeSkippableFrame() :
@@ -1640,8 +1718,8 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
*
* Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number,
* ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
* As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
* the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
* As such, the parameter magicVariant controls the exact skippable frame magic number variant used,
* so the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
*
* Returns an error if destination buffer is not large enough, if the source size is not representable
* with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
@@ -1649,26 +1727,28 @@ ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
* @return : number of bytes written or a ZSTD error.
*/
ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
const void* src, size_t srcSize, unsigned magicVariant);
const void* src, size_t srcSize,
unsigned magicVariant);
/*! ZSTD_readSkippableFrame() :
* Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
* Retrieves the content of a zstd skippable frame starting at @src, and writes it to @dst buffer.
*
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
* in the magicVariant.
* The parameter @magicVariant will receive the magicVariant that was supplied when the frame was written,
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.
* This can be NULL if the caller is not interested in the magicVariant.
*
* Returns an error if destination buffer is not large enough, or if the frame is not skippable.
*
* @return : number of bytes written or a ZSTD error.
*/
ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
const void* src, size_t srcSize);
ZSTDLIB_STATIC_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
unsigned* magicVariant,
const void* src, size_t srcSize);
/*! ZSTD_isSkippableFrame() :
* Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
*/
ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
ZSTDLIB_STATIC_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
@@ -1796,7 +1876,15 @@ static
#ifdef __GNUC__
__attribute__((__unused__))
#endif
#if defined(__clang__) && __clang_major__ >= 5
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#endif
ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */
#if defined(__clang__) && __clang_major__ >= 5
#pragma clang diagnostic pop
#endif
ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
@@ -1976,7 +2064,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
* See the comments on that enum for an explanation of the feature. */
#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
/* Controlled with ZSTD_paramSwitch_e enum.
/* Controlled with ZSTD_ParamSwitch_e enum.
* Default is ZSTD_ps_auto.
* Set to ZSTD_ps_disable to never compress literals.
* Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals
@@ -2117,22 +2205,46 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
/* ZSTD_c_validateSequences
* Default is 0 == disabled. Set to 1 to enable sequence validation.
*
* For use with sequence compression API: ZSTD_compressSequences().
* Designates whether or not we validate sequences provided to ZSTD_compressSequences()
* For use with sequence compression API: ZSTD_compressSequences*().
* Designates whether or not provided sequences are validated within ZSTD_compressSequences*()
* during function execution.
*
* Without validation, providing a sequence that does not conform to the zstd spec will cause
* undefined behavior, and may produce a corrupted block.
* When Sequence validation is disabled (default), Sequences are compressed as-is,
* so they must correct, otherwise it would result in a corruption error.
*
* With validation enabled, if sequence is invalid (see doc/zstd_compression_format.md for
* Sequence validation adds some protection, by ensuring that all values respect boundary conditions.
* If a Sequence is detected invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and
* return an error.
*
*/
#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
/* ZSTD_c_useBlockSplitter
* Controlled with ZSTD_paramSwitch_e enum.
/* ZSTD_c_blockSplitterLevel
* note: this parameter only influences the first splitter stage,
* which is active before producing the sequences.
* ZSTD_c_splitAfterSequences controls the next splitter stage,
* which is active after sequence production.
* Note that both can be combined.
* Allowed values are between 0 and ZSTD_BLOCKSPLITTER_LEVEL_MAX included.
* 0 means "auto", which will select a value depending on current ZSTD_c_strategy.
* 1 means no splitting.
* Then, values from 2 to 6 are sorted in increasing cpu load order.
*
* Note that currently the first block is never split,
* to ensure expansion guarantees in presence of incompressible data.
*/
#define ZSTD_BLOCKSPLITTER_LEVEL_MAX 6
#define ZSTD_c_blockSplitterLevel ZSTD_c_experimentalParam20
/* ZSTD_c_splitAfterSequences
* This is a stronger splitter algorithm,
* based on actual sequences previously produced by the selected parser.
* It's also slower, and as a consequence, mostly used for high compression levels.
* While the post-splitter does overlap with the pre-splitter,
* both can nonetheless be combined,
* notably with ZSTD_c_blockSplitterLevel at ZSTD_BLOCKSPLITTER_LEVEL_MAX,
* resulting in higher compression ratio than just one of them.
*
* Default is ZSTD_ps_auto.
* Set to ZSTD_ps_disable to never use block splitter.
* Set to ZSTD_ps_enable to always use block splitter.
@@ -2140,10 +2252,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
* By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
* block splitting based on the compression parameters.
*/
#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13
#define ZSTD_c_splitAfterSequences ZSTD_c_experimentalParam13
/* ZSTD_c_useRowMatchFinder
* Controlled with ZSTD_paramSwitch_e enum.
* Controlled with ZSTD_ParamSwitch_e enum.
* Default is ZSTD_ps_auto.
* Set to ZSTD_ps_disable to never use row-based matchfinder.
* Set to ZSTD_ps_enable to force usage of row-based matchfinder.
@@ -2175,7 +2287,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
/* ZSTD_c_prefetchCDictTables
* Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
* Controlled with ZSTD_ParamSwitch_e enum. Default is ZSTD_ps_auto.
*
* In some situations, zstd uses CDict tables in-place rather than copying them
* into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
@@ -2219,19 +2331,21 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
* that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
* bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
* compressBound() inaccurate). Only currently meant to be used for testing.
*
*/
#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
/* ZSTD_c_searchForExternalRepcodes
* This parameter affects how zstd parses external sequences, such as sequences
* provided through the compressSequences() API or from an external block-level
* sequence producer.
/* ZSTD_c_repcodeResolution
* This parameter only has an effect if ZSTD_c_blockDelimiters is
* set to ZSTD_sf_explicitBlockDelimiters (may change in the future).
*
* If set to ZSTD_ps_enable, the library will check for repeated offsets in
* This parameter affects how zstd parses external sequences,
* provided via the ZSTD_compressSequences*() API
* or from an external block-level sequence producer.
*
* If set to ZSTD_ps_enable, the library will check for repeated offsets within
* external sequences, even if those repcodes are not explicitly indicated in
* the "rep" field. Note that this is the only way to exploit repcode matches
* while using compressSequences() or an external sequence producer, since zstd
* while using compressSequences*() or an external sequence producer, since zstd
* currently ignores the "rep" field of external sequences.
*
* If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
@@ -2240,12 +2354,11 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
* compression ratio.
*
* The default value is ZSTD_ps_auto, for which the library will enable/disable
* based on compression level.
*
* Note: for now, this param only has an effect if ZSTD_c_blockDelimiters is
* set to ZSTD_sf_explicitBlockDelimiters. That may change in the future.
* based on compression level (currently: level<10 disables, level>=10 enables).
*/
#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19
#define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19
#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
@@ -2952,7 +3065,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
>0 : `srcSize` is too small, please provide at least result bytes on next attempt.
errorCode, which can be tested using ZSTD_isError().
It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
It fills a ZSTD_FrameHeader structure with important information to correctly decode the frame,
such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
As a consequence, check that values remain within valid application range.
@@ -3082,8 +3195,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_
ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */

Binary file not shown.

107
compress/zstd/zstd_errors.h Normal file
View File

@@ -0,0 +1,107 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_ERRORS_H_398273423
#define ZSTD_ERRORS_H_398273423
#if defined (__cplusplus)
extern "C" {
#endif
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
#ifndef ZSTDERRORLIB_VISIBLE
/* Backwards compatibility with old macro name */
# ifdef ZSTDERRORLIB_VISIBILITY
# define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default")))
# else
# define ZSTDERRORLIB_VISIBLE
# endif
#endif
#ifndef ZSTDERRORLIB_HIDDEN
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
# else
# define ZSTDERRORLIB_HIDDEN
# endif
#endif
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
#endif
/*-*********************************************
* Error codes list
*-*********************************************
* Error codes _values_ are pinned down since v1.3.1 only.
* Therefore, don't rely on values if you may link to any version < v1.3.1.
*
* Only values < 100 are considered stable.
*
* note 1 : this API shall be used with static linking only.
* dynamic linking is not yet officially supported.
* note 2 : Prefer relying on the enum than on its value whenever possible
* This is the only supported way to use the error list < v1.3.1
* note 3 : ZSTD_isError() is always correct, whatever the library version.
**********************************************/
typedef enum {
ZSTD_error_no_error = 0,
ZSTD_error_GENERIC = 1,
ZSTD_error_prefix_unknown = 10,
ZSTD_error_version_unsupported = 12,
ZSTD_error_frameParameter_unsupported = 14,
ZSTD_error_frameParameter_windowTooLarge = 16,
ZSTD_error_corruption_detected = 20,
ZSTD_error_checksum_wrong = 22,
ZSTD_error_literals_headerWrong = 24,
ZSTD_error_dictionary_corrupted = 30,
ZSTD_error_dictionary_wrong = 32,
ZSTD_error_dictionaryCreation_failed = 34,
ZSTD_error_parameter_unsupported = 40,
ZSTD_error_parameter_combination_unsupported = 41,
ZSTD_error_parameter_outOfBound = 42,
ZSTD_error_tableLog_tooLarge = 44,
ZSTD_error_maxSymbolValue_tooLarge = 46,
ZSTD_error_maxSymbolValue_tooSmall = 48,
ZSTD_error_cannotProduce_uncompressedBlock = 49,
ZSTD_error_stabilityCondition_notRespected = 50,
ZSTD_error_stage_wrong = 60,
ZSTD_error_init_missing = 62,
ZSTD_error_memory_allocation = 64,
ZSTD_error_workSpace_tooSmall= 66,
ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72,
ZSTD_error_dstBuffer_null = 74,
ZSTD_error_noForwardProgress_destFull = 80,
ZSTD_error_noForwardProgress_inputEmpty = 82,
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105,
ZSTD_error_sequenceProducer_failed = 106,
ZSTD_error_externalSequences_invalid = 107,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;
ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_ERRORS_H_398273423 */

Binary file not shown.