update bundled libdeflate to v1.11

Signed-off-by: Ivailo Monev <xakepa10@gmail.com>
This commit is contained in:
Ivailo Monev 2022-05-27 12:55:53 +03:00
parent 319e717ac0
commit 7d932a6541
13 changed files with 818 additions and 650 deletions

View file

@ -1,2 +1,2 @@
This is Git checkout 3cc3608e9c340e4996dff3d0633acf2ec537e12a
This is Git checkout b01537448e8eaf0803e38bdba5acef1d1c8effba
from https://github.com/ebiggers/libdeflate that has not been modified.

View file

@ -122,15 +122,30 @@
# endif
/*
* Determine whether CRC32 intrinsics are supported.
* Determine whether ARM CRC32 intrinsics are supported.
*
* With gcc r274827 or later (gcc 10.1+, 9.3+, or 8.4+), or with clang,
* they work as expected. (Well, not quite. There's still a bug, but we
* have to work around it later when including arm_acle.h.)
* This support has been affected by several gcc bugs, which we must avoid
* by only allowing gcc versions that have the corresponding fixes. First,
* gcc commit 943766d37ae4 ("[arm] Fix use of CRC32 intrinsics with Armv8-a
* and hard-float"), i.e. gcc 8.4+, 9.3+, 10.1+, or 11+, is needed.
* Second, gcc commit c1cdabe3aab8 ("arm: reorder assembler architecture
* directives [PR101723]"), i.e. gcc 9.5+, 10.4+, 11.3+, or 12+, is needed
* when binutils is 2.34 or later, due to
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104439. We use the second
* set of prerequisites, as they are stricter and we have no way to detect
* the binutils version in C source without requiring a configure script.
*
* Yet another gcc bug makes arm_acle.h sometimes not define the crc
* functions even when the corresponding builtins are available. However,
* we work around this later when including arm_acle.h.
*
* Things are a bit easier with clang -- we can just check whether the
* crc builtins are available. However, clang's arm_acle.h is broken in
* the same way as gcc's, which we work around later in the same way.
*/
# if GCC_PREREQ(10, 1) || \
(GCC_PREREQ(9, 3) && !GCC_PREREQ(10, 0)) || \
(GCC_PREREQ(8, 4) && !GCC_PREREQ(9, 0)) || \
# if GCC_PREREQ(11, 3) || \
(GCC_PREREQ(10, 4) && !GCC_PREREQ(11, 0)) || \
(GCC_PREREQ(9, 5) && !GCC_PREREQ(10, 0)) || \
(defined(__clang__) && __has_builtin(__builtin_arm_crc32b))
# define COMPILER_SUPPORTS_CRC32_TARGET_INTRINSICS 1
# endif
@ -199,3 +214,22 @@ typedef char __v64qi __attribute__((__vector_size__(64)));
#define bsr64(n) (63 - __builtin_clzll(n))
#define bsf32(n) __builtin_ctz(n)
#define bsf64(n) __builtin_ctzll(n)
#if defined(__arm__) && \
(__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__)))
static forceinline unsigned int
rbit32(unsigned int v)
{
__asm__("rbit %0, %1\n" : "=r" (v) : "r" (v));
return v;
}
#define rbit32 rbit32
#elif defined(__aarch64__)
static forceinline unsigned int
rbit32(unsigned int v)
{
__asm__("rbit %w0, %w1\n" : "=r" (v) : "r" (v));
return v;
}
#define rbit32 rbit32
#endif /* __aarch64__ */

View file

@ -26,13 +26,8 @@
*/
/*
* ARM processors don't have a standard way for unprivileged programs to detect
* processor features. But, on Linux we can read the AT_HWCAP and AT_HWCAP2
* values from /proc/self/auxv.
*
* Ideally we'd use the C library function getauxval(), but it's not guaranteed
* to be available: it was only added to glibc in 2.16, and in Android it was
* added to API level 18 for ARM and level 21 for AArch64.
* ARM CPUs don't have a standard way for unprivileged programs to detect CPU
* features. But an OS-specific way can be used when available.
*/
#include "../cpu_features_common.h" /* must be included first */
@ -40,6 +35,16 @@
#if ARM_CPU_FEATURES_ENABLED
#ifdef __linux__
/*
* On Linux, arm32 and arm64 CPU features can be detected by reading the
* AT_HWCAP and AT_HWCAP2 values from /proc/self/auxv.
*
* Ideally we'd use the C library function getauxval(), but it's not guaranteed
* to be available: it was only added to glibc in 2.16, and in Android it was
* added to API level 18 for arm32 and level 21 for arm64.
*/
#include <errno.h>
#include <fcntl.h>
#include <string.h>
@ -48,8 +53,6 @@
#define AT_HWCAP 16
#define AT_HWCAP2 26
volatile u32 _cpu_features = 0;
static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2)
{
int fd;
@ -92,13 +95,7 @@ out:
close(fd);
}
static const struct cpu_feature arm_cpu_feature_table[] = {
{ARM_CPU_FEATURE_NEON, "neon"},
{ARM_CPU_FEATURE_PMULL, "pmull"},
{ARM_CPU_FEATURE_CRC32, "crc32"},
};
void setup_cpu_features(void)
static u32 get_arm_cpu_features(void)
{
u32 features = 0;
unsigned long hwcap = 0;
@ -123,6 +120,56 @@ void setup_cpu_features(void)
if (hwcap & (1 << 7)) /* HWCAP_CRC32 */
features |= ARM_CPU_FEATURE_CRC32;
#endif
return features;
}
#elif defined(__APPLE__)
/* On Apple platforms, arm64 CPU features can be detected via sysctlbyname(). */
#include <sys/types.h>
#include <sys/sysctl.h>
static const struct {
const char *name;
u32 feature;
} feature_sysctls[] = {
{ "hw.optional.neon", ARM_CPU_FEATURE_NEON },
{ "hw.optional.AdvSIMD", ARM_CPU_FEATURE_NEON },
{ "hw.optional.arm.FEAT_PMULL", ARM_CPU_FEATURE_PMULL },
{ "hw.optional.armv8_crc32", ARM_CPU_FEATURE_CRC32 },
};
static u32 get_arm_cpu_features(void)
{
u32 features = 0;
size_t i;
for (i = 0; i < ARRAY_LEN(feature_sysctls); i++) {
const char *name = feature_sysctls[i].name;
u32 val = 0;
size_t valsize = sizeof(val);
if (sysctlbyname(name, &val, &valsize, NULL, 0) == 0 &&
valsize == sizeof(val) && val == 1)
features |= feature_sysctls[i].feature;
}
return features;
}
#else
#error "unhandled case"
#endif
static const struct cpu_feature arm_cpu_feature_table[] = {
{ARM_CPU_FEATURE_NEON, "neon"},
{ARM_CPU_FEATURE_PMULL, "pmull"},
{ARM_CPU_FEATURE_CRC32, "crc32"},
};
volatile u32 _cpu_features = 0;
void setup_cpu_features(void)
{
u32 features = get_arm_cpu_features();
disable_cpu_features_for_testing(&features, arm_cpu_feature_table,
ARRAY_LEN(arm_cpu_feature_table));

View file

@ -8,7 +8,8 @@
#include "../lib_common.h"
#if (defined(__arm__) || defined(__aarch64__)) && \
defined(__linux__) && \
(defined(__linux__) || \
(defined(__aarch64__) && defined(__APPLE__))) && \
COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE && \
!defined(FREESTANDING)
# define ARM_CPU_FEATURES_ENABLED 1

View file

@ -80,13 +80,17 @@ crc32_arm(u32 remainder, const u8 *p, size_t size)
size--;
}
while (size >= 32) {
while (size >= 64) {
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 0)));
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 1)));
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 2)));
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 3)));
p += 32;
size -= 32;
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 4)));
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 5)));
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 6)));
remainder = __crc32d(remainder, le64_bswap(*((u64 *)p + 7)));
p += 64;
size -= 64;
}
while (size >= 8) {

View file

@ -139,14 +139,14 @@ bt_right_child(struct bt_matchfinder *mf, s32 node)
/* Advance the binary tree matchfinder by one byte, optionally recording
* matches. @record_matches should be a compile-time constant. */
static forceinline struct lz_match *
bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
const u8 * const restrict in_base,
bt_matchfinder_advance_one_byte(struct bt_matchfinder * const mf,
const u8 * const in_base,
const ptrdiff_t cur_pos,
const u32 max_len,
const u32 nice_len,
const u32 max_search_depth,
u32 * const restrict next_hashes,
struct lz_match * restrict lz_matchptr,
u32 * const next_hashes,
struct lz_match *lz_matchptr,
const bool record_matches)
{
const u8 *in_next = in_base + cur_pos;

File diff suppressed because it is too large Load diff

View file

@ -313,7 +313,7 @@ do { \
/*
* Read a 16-bit value from the input. This must have been preceded by a call
* to ALIGN_INPUT(), and the caller must have already checked for overrun.
* to ALIGN_INPUT(), and the caller must have already checked for overread.
*/
#define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
@ -886,9 +886,8 @@ copy_word_unaligned(const void *src, void *dst)
*****************************************************************************/
typedef enum libdeflate_result (*decompress_func_t)
(struct libdeflate_decompressor * restrict d,
const void * restrict in, size_t in_nbytes,
void * restrict out, size_t out_nbytes_avail,
(struct libdeflate_decompressor *d,
const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail,
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
#undef DEFAULT_IMPL
@ -906,18 +905,16 @@ typedef enum libdeflate_result (*decompress_func_t)
#ifdef DISPATCH
static enum libdeflate_result
dispatch(struct libdeflate_decompressor * restrict d,
const void * restrict in, size_t in_nbytes,
void * restrict out, size_t out_nbytes_avail,
dispatch(struct libdeflate_decompressor *d,
const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail,
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
static volatile decompress_func_t decompress_impl = dispatch;
/* Choose the fastest implementation at runtime */
static enum libdeflate_result
dispatch(struct libdeflate_decompressor * restrict d,
const void * restrict in, size_t in_nbytes,
void * restrict out, size_t out_nbytes_avail,
dispatch(struct libdeflate_decompressor *d,
const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail,
size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
{
decompress_func_t f = arch_select_decompress_func();
@ -943,9 +940,9 @@ dispatch(struct libdeflate_decompressor * restrict d,
* at runtime.
*/
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor * restrict d,
const void * restrict in, size_t in_nbytes,
void * restrict out, size_t out_nbytes_avail,
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *d,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail,
size_t *actual_in_nbytes_ret,
size_t *actual_out_nbytes_ret)
{
@ -954,9 +951,9 @@ libdeflate_deflate_decompress_ex(struct libdeflate_decompressor * restrict d,
}
LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,
const void * restrict in, size_t in_nbytes,
void * restrict out, size_t out_nbytes_avail,
libdeflate_deflate_decompress(struct libdeflate_decompressor *d,
const void *in, size_t in_nbytes,
void *out, size_t out_nbytes_avail,
size_t *actual_out_nbytes_ret)
{
return libdeflate_deflate_decompress_ex(d, in, in_nbytes,

View file

@ -181,15 +181,15 @@ hc_matchfinder_slide_window(struct hc_matchfinder *mf)
* 'best_len' was found.
*/
static forceinline u32
hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf,
const u8 ** const restrict in_base_p,
const u8 * const restrict in_next,
hc_matchfinder_longest_match(struct hc_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 * const in_next,
u32 best_len,
const u32 max_len,
const u32 nice_len,
const u32 max_search_depth,
u32 * const restrict next_hashes,
u32 * const restrict offset_ret)
u32 * const next_hashes,
u32 * const offset_ret)
{
u32 depth_remaining = max_search_depth;
const u8 *best_matchptr = in_next;
@ -359,12 +359,12 @@ out:
* the sequence beginning at @in_next + @count.
*/
static forceinline void
hc_matchfinder_skip_bytes(struct hc_matchfinder * const restrict mf,
const u8 ** const restrict in_base_p,
hc_matchfinder_skip_bytes(struct hc_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 *in_next,
const u8 * const in_end,
const u32 count,
u32 * const restrict next_hashes)
u32 * const next_hashes)
{
u32 cur_pos;
u32 hash3, hash4;

View file

@ -75,13 +75,13 @@ ht_matchfinder_slide_window(struct ht_matchfinder *mf)
/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
static forceinline u32
ht_matchfinder_longest_match(struct ht_matchfinder * const restrict mf,
const u8 ** const restrict in_base_p,
const u8 * const restrict in_next,
ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 * const in_next,
const u32 max_len,
const u32 nice_len,
u32 * const restrict next_hash,
u32 * const restrict offset_ret)
u32 * const next_hash,
u32 * const offset_ret)
{
u32 best_len = 0;
const u8 *best_matchptr = in_next;
@ -195,12 +195,12 @@ out:
}
static forceinline void
ht_matchfinder_skip_bytes(struct ht_matchfinder * const restrict mf,
const u8 ** const restrict in_base_p,
ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
const u8 ** const in_base_p,
const u8 *in_next,
const u8 * const in_end,
const u32 count,
u32 * const restrict next_hash)
u32 * const next_hash)
{
s32 cur_pos = in_next - *in_base_p;
u32 hash;

View file

@ -60,8 +60,22 @@ void *memmove(void *dest, const void *src, size_t n);
int memcmp(const void *s1, const void *s2, size_t n);
#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
#undef LIBDEFLATE_ENABLE_ASSERTIONS
#else
#include <string.h>
#endif
/*
* Runtime assertion support. Don't enable this in production builds; it may
* hurt performance significantly.
*/
#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
void libdeflate_assertion_failed(const char *expr, const char *file, int line);
#define ASSERT(expr) { if (unlikely(!(expr))) \
libdeflate_assertion_failed(#expr, __FILE__, __LINE__); }
#else
#define ASSERT(expr) (void)(expr)
#endif
#endif /* LIB_LIB_COMMON_H */

View file

@ -140,3 +140,14 @@ memcmp(const void *s1, const void *s2, size_t n)
return 0;
}
#endif /* FREESTANDING */
#ifdef LIBDEFLATE_ENABLE_ASSERTIONS
#include <stdio.h>
#include <stdlib.h>
void
libdeflate_assertion_failed(const char *expr, const char *file, int line)
{
fprintf(stderr, "Assertion failed: %s at %s:%d\n", expr, file, line);
abort();
}
#endif /* LIBDEFLATE_ENABLE_ASSERTIONS */

View file

@ -10,8 +10,8 @@ extern "C" {
#endif
#define LIBDEFLATE_VERSION_MAJOR 1
#define LIBDEFLATE_VERSION_MINOR 9
#define LIBDEFLATE_VERSION_STRING "1.9"
#define LIBDEFLATE_VERSION_MINOR 11
#define LIBDEFLATE_VERSION_STRING "1.11"
#include <stddef.h>
#include <stdint.h>
@ -31,7 +31,7 @@ extern "C" {
# define LIBDEFLATEEXPORT
#endif
#if defined(_WIN32) && !defined(_WIN64)
#if defined(_WIN32) && !defined(_WIN64) && defined(LIBDEFLATE_DLL)
# define LIBDEFLATEAPI_ABI __stdcall
#else
# define LIBDEFLATEAPI_ABI