mirror of
https://bitbucket.org/smil3y/katie.git
synced 2025-02-24 10:52:56 +00:00
update bundled libdeflate to v1.11
Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
parent
319e717ac0
commit
7d932a6541
13 changed files with 818 additions and 650 deletions
2
src/3rdparty/libdeflate/NOTE
vendored
2
src/3rdparty/libdeflate/NOTE
vendored
|
@ -1,2 +1,2 @@
|
|||
This is Git checkout 3cc3608e9c340e4996dff3d0633acf2ec537e12a
|
||||
This is Git checkout b01537448e8eaf0803e38bdba5acef1d1c8effba
|
||||
from https://github.com/ebiggers/libdeflate that has not been modified.
|
||||
|
|
48
src/3rdparty/libdeflate/common/compiler_gcc.h
vendored
48
src/3rdparty/libdeflate/common/compiler_gcc.h
vendored
|
@ -122,15 +122,30 @@
|
|||
# endif
|
||||
|
||||
/*
|
||||
* Determine whether CRC32 intrinsics are supported.
|
||||
* Determine whether ARM CRC32 intrinsics are supported.
|
||||
*
|
||||
* With gcc r274827 or later (gcc 10.1+, 9.3+, or 8.4+), or with clang,
|
||||
* they work as expected. (Well, not quite. There's still a bug, but we
|
||||
* have to work around it later when including arm_acle.h.)
|
||||
* This support has been affected by several gcc bugs, which we must avoid
|
||||
* by only allowing gcc versions that have the corresponding fixes. First,
|
||||
* gcc commit 943766d37ae4 ("[arm] Fix use of CRC32 intrinsics with Armv8-a
|
||||
* and hard-float"), i.e. gcc 8.4+, 9.3+, 10.1+, or 11+, is needed.
|
||||
* Second, gcc commit c1cdabe3aab8 ("arm: reorder assembler architecture
|
||||
* directives [PR101723]"), i.e. gcc 9.5+, 10.4+, 11.3+, or 12+, is needed
|
||||
* when binutils is 2.34 or later, due to
|
||||
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104439. We use the second
|
||||
* set of prerequisites, as they are stricter and we have no way to detect
|
||||
* the binutils version in C source without requiring a configure script.
|
||||
*
|
||||
* Yet another gcc bug makes arm_acle.h sometimes not define the crc
|
||||
* functions even when the corresponding builtins are available. However,
|
||||
* we work around this later when including arm_acle.h.
|
||||
*
|
||||
* Things are a bit easier with clang -- we can just check whether the
|
||||
* crc builtins are available. However, clang's arm_acle.h is broken in
|
||||
* the same way as gcc's, which we work around later in the same way.
|
||||
*/
|
||||
# if GCC_PREREQ(10, 1) || \
|
||||
(GCC_PREREQ(9, 3) && !GCC_PREREQ(10, 0)) || \
|
||||
(GCC_PREREQ(8, 4) && !GCC_PREREQ(9, 0)) || \
|
||||
# if GCC_PREREQ(11, 3) || \
|
||||
(GCC_PREREQ(10, 4) && !GCC_PREREQ(11, 0)) || \
|
||||
(GCC_PREREQ(9, 5) && !GCC_PREREQ(10, 0)) || \
|
||||
(defined(__clang__) && __has_builtin(__builtin_arm_crc32b))
|
||||
# define COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS 1
|
||||
# endif
|
||||
|
@ -199,3 +214,22 @@ typedef char __v64qi __attribute__((__vector_size__(64)));
|
|||
#define bsr64(n) (63 - __builtin_clzll(n))
|
||||
#define bsf32(n) __builtin_ctz(n)
|
||||
#define bsf64(n) __builtin_ctzll(n)
|
||||
|
||||
#if defined(__arm__) && \
|
||||
(__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
|
||||
static forceinline unsigned int
|
||||
rbit32(unsigned int v)
|
||||
{
|
||||
__asm__("rbit %0, %1\n" : "=r" (v) : "r" (v));
|
||||
return v;
|
||||
}
|
||||
#define rbit32 rbit32
|
||||
#elif defined(__aarch64__)
|
||||
static forceinline unsigned int
|
||||
rbit32(unsigned int v)
|
||||
{
|
||||
__asm__("rbit %w0, %w1\n" : "=r" (v) : "r" (v));
|
||||
return v;
|
||||
}
|
||||
#define rbit32 rbit32
|
||||
#endif /* __aarch64__ */
|
||||
|
|
79
src/3rdparty/libdeflate/lib/arm/cpu_features.c
vendored
79
src/3rdparty/libdeflate/lib/arm/cpu_features.c
vendored
|
@ -26,13 +26,8 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* ARM processors don't have a standard way for unprivileged programs to detect
|
||||
* processor features. But, on Linux we can read the AT_HWCAP and AT_HWCAP2
|
||||
* values from /proc/self/auxv.
|
||||
*
|
||||
* Ideally we'd use the C library function getauxval(), but it's not guaranteed
|
||||
* to be available: it was only added to glibc in 2.16, and in Android it was
|
||||
* added to API level 18 for ARM and level 21 for AArch64.
|
||||
* ARM CPUs don't have a standard way for unprivileged programs to detect CPU
|
||||
* features. But an OS-specific way can be used when available.
|
||||
*/
|
||||
|
||||
#include "../cpu_features_common.h" /* must be included first */
|
||||
|
@ -40,6 +35,16 @@
|
|||
|
||||
#if ARM_CPU_FEATURES_ENABLED
|
||||
|
||||
#ifdef __linux__
|
||||
/*
|
||||
* On Linux, arm32 and arm64 CPU features can be detected by reading the
|
||||
* AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv.
|
||||
*
|
||||
* Ideally we'd use the C library function getauxval(), but it's not guaranteed
|
||||
* to be available: it was only added to glibc in 2.16, and in Android it was
|
||||
* added to API level 18 for arm32 and level 21 for arm64.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
|
@ -48,8 +53,6 @@
|
|||
#define AT_HWCAP 16
|
||||
#define AT_HWCAP2 26
|
||||
|
||||
volatile u32 _cpu_features = 0;
|
||||
|
||||
static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2)
|
||||
{
|
||||
int fd;
|
||||
|
@ -92,13 +95,7 @@ out:
|
|||
close(fd);
|
||||
}
|
||||
|
||||
static const struct cpu_feature arm_cpu_feature_table[] = {
|
||||
{ARM_CPU_FEATURE_NEON, "neon"},
|
||||
{ARM_CPU_FEATURE_PMULL, "pmull"},
|
||||
{ARM_CPU_FEATURE_CRC32, "crc32"},
|
||||
};
|
||||
|
||||
void setup_cpu_features(void)
|
||||
static u32 get_arm_cpu_features(void)
|
||||
{
|
||||
u32 features = 0;
|
||||
unsigned long hwcap = 0;
|
||||
|
@ -123,6 +120,56 @@ void setup_cpu_features(void)
|
|||
if (hwcap & (1 << 7)) /* HWCAP_CRC32 */
|
||||
features |= ARM_CPU_FEATURE_CRC32;
|
||||
#endif
|
||||
return features;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
/* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
u32 feature;
|
||||
} feature_sysctls[] = {
|
||||
{ "hw.optional.neon", ARM_CPU_FEATURE_NEON },
|
||||
{ "hw.optional.AdvSIMD", ARM_CPU_FEATURE_NEON },
|
||||
{ "hw.optional.arm.FEAT_PMULL", ARM_CPU_FEATURE_PMULL },
|
||||
{ "hw.optional.armv8_crc32", ARM_CPU_FEATURE_CRC32 },
|
||||
};
|
||||
|
||||
static u32 get_arm_cpu_features(void)
|
||||
{
|
||||
u32 features = 0;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < ARRAY_LEN(feature_sysctls); i++) {
|
||||
const char *name = feature_sysctls[i].name;
|
||||
u32 val = 0;
|
||||
size_t valsize = sizeof(val);
|
||||
|
||||
if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 &&
|
||||
valsize == sizeof(val) && val == 1)
|
||||
features |= feature_sysctls[i].feature;
|
||||
}
|
||||
return features;
|
||||
}
|
||||
#else
|
||||
#error "unhandled case"
|
||||
#endif
|
||||
|
||||
static const struct cpu_feature arm_cpu_feature_table[] = {
|
||||
{ARM_CPU_FEATURE_NEON, "neon"},
|
||||
{ARM_CPU_FEATURE_PMULL, "pmull"},
|
||||
{ARM_CPU_FEATURE_CRC32, "crc32"},
|
||||
};
|
||||
|
||||
volatile u32 _cpu_features = 0;
|
||||
|
||||
void setup_cpu_features(void)
|
||||
{
|
||||
u32 features = get_arm_cpu_features();
|
||||
|
||||
disable_cpu_features_for_testing(&features, arm_cpu_feature_table,
|
||||
ARRAY_LEN(arm_cpu_feature_table));
|
||||
|
|
|
@ -8,7 +8,8 @@
|
|||
#include "../lib_common.h"
|
||||
|
||||
#if (defined(__arm__) || defined(__aarch64__)) && \
|
||||
defined(__linux__) && \
|
||||
(defined(__linux__) || \
|
||||
(defined(__aarch64__) && defined(__APPLE__))) && \
|
||||
COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE && \
|
||||
!defined(FREESTANDING)
|
||||
# define ARM_CPU_FEATURES_ENABLED 1
|
||||
|
|
10
src/3rdparty/libdeflate/lib/arm/crc32_impl.h
vendored
10
src/3rdparty/libdeflate/lib/arm/crc32_impl.h
vendored
|
@ -80,13 +80,17 @@ crc32_arm(u32 remainder, const u8 *p, size_t size)
|
|||
size--;
|
||||
}
|
||||
|
||||
while (size >= 32) {
|
||||
while (size >= 64) {
|
||||
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 0)));
|
||||
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 1)));
|
||||
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 2)));
|
||||
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 3)));
|
||||
p += 32;
|
||||
size -= 32;
|
||||
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 4)));
|
||||
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 5)));
|
||||
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 6)));
|
||||
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 7)));
|
||||
p += 64;
|
||||
size -= 64;
|
||||
}
|
||||
|
||||
while (size >= 8) {
|
||||
|
|
8
src/3rdparty/libdeflate/lib/bt_matchfinder.h
vendored
8
src/3rdparty/libdeflate/lib/bt_matchfinder.h
vendored
|
@ -139,14 +139,14 @@ bt_right_child(struct bt_matchfinder *mf, s32 node)
|
|||
/* Advance the binary tree matchfinder by one byte, optionally recording
|
||||
* matches. @record_matches should be a compile-time constant. */
|
||||
static forceinline struct lz_match *
|
||||
bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
|
||||
const u8 * const restrict in_base,
|
||||
bt_matchfinder_advance_one_byte(struct bt_matchfinder * const mf,
|
||||
const u8 * const in_base,
|
||||
const ptrdiff_t cur_pos,
|
||||
const u32 max_len,
|
||||
const u32 nice_len,
|
||||
const u32 max_search_depth,
|
||||
u32 * const restrict next_hashes,
|
||||
struct lz_match * restrict lz_matchptr,
|
||||
u32 * const next_hashes,
|
||||
struct lz_match *lz_matchptr,
|
||||
const bool record_matches)
|
||||
{
|
||||
const u8 *in_next = in_base + cur_pos;
|
||||
|
|
1226
src/3rdparty/libdeflate/lib/deflate_compress.c
vendored
1226
src/3rdparty/libdeflate/lib/deflate_compress.c
vendored
File diff suppressed because it is too large
Load diff
29
src/3rdparty/libdeflate/lib/deflate_decompress.c
vendored
29
src/3rdparty/libdeflate/lib/deflate_decompress.c
vendored
|
@ -313,7 +313,7 @@ do { \
|
|||
|
||||
/*
|
||||
* Read a 16-bit value from the input. This must have been preceded by a call
|
||||
* to ALIGN_INPUT(), and the caller must have already checked for overrun.
|
||||
* to ALIGN_INPUT(), and the caller must have already checked for overread.
|
||||
*/
|
||||
#define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
|
||||
|
||||
|
@ -886,9 +886,8 @@ copy_word_unaligned(const void *src, void *dst)
|
|||
*****************************************************************************/
|
||||
|
||||
typedef enum libdeflate_result (*decompress_func_t)
|
||||
(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
(struct libdeflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
|
||||
|
||||
#undef DEFAULT_IMPL
|
||||
|
@ -906,18 +905,16 @@ typedef enum libdeflate_result (*decompress_func_t)
|
|||
|
||||
#ifdef DISPATCH
|
||||
static enum libdeflate_result
|
||||
dispatch(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
dispatch(struct libdeflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
|
||||
|
||||
static volatile decompress_func_t decompress_impl = dispatch;
|
||||
|
||||
/* Choose the fastest implementation at runtime */
|
||||
static enum libdeflate_result
|
||||
dispatch(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
dispatch(struct libdeflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
decompress_func_t f = arch_select_decompress_func();
|
||||
|
@ -943,9 +940,9 @@ dispatch(struct libdeflate_decompressor * restrict d,
|
|||
* at runtime.
|
||||
*/
|
||||
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_in_nbytes_ret,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
|
@ -954,9 +951,9 @@ libdeflate_deflate_decompress_ex(struct libdeflate_decompressor * restrict d,
|
|||
}
|
||||
|
||||
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
|
||||
libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,
|
||||
const void * restrict in, size_t in_nbytes,
|
||||
void * restrict out, size_t out_nbytes_avail,
|
||||
libdeflate_deflate_decompress(struct libdeflate_decompressor *d,
|
||||
const void *in, size_t in_nbytes,
|
||||
void *out, size_t out_nbytes_avail,
|
||||
size_t *actual_out_nbytes_ret)
|
||||
{
|
||||
return libdeflate_deflate_decompress_ex(d, in, in_nbytes,
|
||||
|
|
16
src/3rdparty/libdeflate/lib/hc_matchfinder.h
vendored
16
src/3rdparty/libdeflate/lib/hc_matchfinder.h
vendored
|
@ -181,15 +181,15 @@ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
|
|||
* 'best_len' was found.
|
||||
*/
|
||||
static forceinline u32
|
||||
hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
|
||||
const u8 ** const restrict in_base_p,
|
||||
const u8 * const restrict in_next,
|
||||
hc_matchfinder_longest_match(struct hc_matchfinder * const mf,
|
||||
const u8 ** const in_base_p,
|
||||
const u8 * const in_next,
|
||||
u32 best_len,
|
||||
const u32 max_len,
|
||||
const u32 nice_len,
|
||||
const u32 max_search_depth,
|
||||
u32 * const restrict next_hashes,
|
||||
u32 * const restrict offset_ret)
|
||||
u32 * const next_hashes,
|
||||
u32 * const offset_ret)
|
||||
{
|
||||
u32 depth_remaining = max_search_depth;
|
||||
const u8 *best_matchptr = in_next;
|
||||
|
@ -359,12 +359,12 @@ out:
|
|||
* the sequence beginning at @in_next + @count.
|
||||
*/
|
||||
static forceinline void
|
||||
hc_matchfinder_skip_bytes(struct hc_matchfinder * const restrict mf,
|
||||
const u8 ** const restrict in_base_p,
|
||||
hc_matchfinder_skip_bytes(struct hc_matchfinder * const mf,
|
||||
const u8 ** const in_base_p,
|
||||
const u8 *in_next,
|
||||
const u8 * const in_end,
|
||||
const u32 count,
|
||||
u32 * const restrict next_hashes)
|
||||
u32 * const next_hashes)
|
||||
{
|
||||
u32 cur_pos;
|
||||
u32 hash3, hash4;
|
||||
|
|
16
src/3rdparty/libdeflate/lib/ht_matchfinder.h
vendored
16
src/3rdparty/libdeflate/lib/ht_matchfinder.h
vendored
|
@ -75,13 +75,13 @@ ht_matchfinder_slide_window(struct ht_matchfinder *mf)
|
|||
|
||||
/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
|
||||
static forceinline u32
|
||||
ht_matchfinder_longest_match(struct ht_matchfinder * const restrict mf,
|
||||
const u8 ** const restrict in_base_p,
|
||||
const u8 * const restrict in_next,
|
||||
ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
|
||||
const u8 ** const in_base_p,
|
||||
const u8 * const in_next,
|
||||
const u32 max_len,
|
||||
const u32 nice_len,
|
||||
u32 * const restrict next_hash,
|
||||
u32 * const restrict offset_ret)
|
||||
u32 * const next_hash,
|
||||
u32 * const offset_ret)
|
||||
{
|
||||
u32 best_len = 0;
|
||||
const u8 *best_matchptr = in_next;
|
||||
|
@ -195,12 +195,12 @@ out:
|
|||
}
|
||||
|
||||
static forceinline void
|
||||
ht_matchfinder_skip_bytes(struct ht_matchfinder * const restrict mf,
|
||||
const u8 ** const restrict in_base_p,
|
||||
ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
|
||||
const u8 ** const in_base_p,
|
||||
const u8 *in_next,
|
||||
const u8 * const in_end,
|
||||
const u32 count,
|
||||
u32 * const restrict next_hash)
|
||||
u32 * const next_hash)
|
||||
{
|
||||
s32 cur_pos = in_next - *in_base_p;
|
||||
u32 hash;
|
||||
|
|
14
src/3rdparty/libdeflate/lib/lib_common.h
vendored
14
src/3rdparty/libdeflate/lib/lib_common.h
vendored
|
@ -60,8 +60,22 @@ void *memmove(void *dest, const void *src, size_t n);
|
|||
|
||||
int memcmp(const void *s1, const void *s2, size_t n);
|
||||
#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
|
||||
|
||||
#undef LIBDEFLATE_ENABLE_ASSERTIONS
|
||||
#else
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Runtime assertion support. Don't enable this in production builds; it may
|
||||
* hurt performance significantly.
|
||||
*/
|
||||
#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
|
||||
void libdeflate_assertion_failed(const char *expr, const char *file, int line);
|
||||
#define ASSERT(expr) { if (unlikely(!(expr))) \
|
||||
libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
|
||||
#else
|
||||
#define ASSERT(expr) (void)(expr)
|
||||
#endif
|
||||
|
||||
#endif /* LIB_LIB_COMMON_H */
|
||||
|
|
11
src/3rdparty/libdeflate/lib/utils.c
vendored
11
src/3rdparty/libdeflate/lib/utils.c
vendored
|
@ -140,3 +140,14 @@ memcmp(const void *s1, const void *s2, size_t n)
|
|||
return 0;
|
||||
}
|
||||
#endif /* FREESTANDING */
|
||||
|
||||
#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
void
|
||||
libdeflate_assertion_failed(const char *expr, const char *file, int line)
|
||||
{
|
||||
fprintf(stderr, "Assertion failed: %s at %s:%d\n", expr, file, line);
|
||||
abort();
|
||||
}
|
||||
#endif /* LIBDEFLATE_ENABLE_ASSERTIONS */
|
||||
|
|
6
src/3rdparty/libdeflate/libdeflate.h
vendored
6
src/3rdparty/libdeflate/libdeflate.h
vendored
|
@ -10,8 +10,8 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
#define LIBDEFLATE_VERSION_MAJOR 1
|
||||
#define LIBDEFLATE_VERSION_MINOR 9
|
||||
#define LIBDEFLATE_VERSION_STRING "1.9"
|
||||
#define LIBDEFLATE_VERSION_MINOR 11
|
||||
#define LIBDEFLATE_VERSION_STRING "1.11"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
@ -31,7 +31,7 @@ extern "C" {
|
|||
# define LIBDEFLATEEXPORT
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && !defined(_WIN64)
|
||||
#if defined(_WIN32) && !defined(_WIN64) && defined(LIBDEFLATE_DLL)
|
||||
# define LIBDEFLATEAPI_ABI __stdcall
|
||||
#else
|
||||
# define LIBDEFLATEAPI_ABI
|
||||
|
|
Loading…
Add table
Reference in a new issue