Mercurial > public > mercurial-scm > hg
diff contrib/python-zstandard/zstd/common/zstd_internal.h @ 43994:de7838053207
zstandard: vendor python-zstandard 0.13.0
Version 0.13.0 of the package was just released. It contains
an upgraded zstd C library which can result in some performance
wins, official support for Python 3.8, and a blackened code base.
There were no meaningful code or functionality changes in this
release of python-zstandard: just reformatting and an upgraded
zstd library version. So the diff seems much larger than what it
is.
Files were added without modifications.
The clang-format-ignorelist file was updated to reflect a new
header file in the zstd distribution.
# no-check-commit because 3rd party code has different style guidelines
Differential Revision: https://phab.mercurial-scm.org/D7770
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 28 Dec 2019 09:55:45 -0800 |
parents | 69de49c4e39c |
children |
line wrap: on
line diff
--- a/contrib/python-zstandard/zstd/common/zstd_internal.h Fri Dec 27 18:54:57 2019 -0500 +++ b/contrib/python-zstandard/zstd/common/zstd_internal.h Sat Dec 28 09:55:45 2019 -0800 @@ -197,79 +197,56 @@ static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } -#define WILDCOPY_OVERLENGTH 8 -#define VECLEN 16 +#define WILDCOPY_OVERLENGTH 32 +#define WILDCOPY_VECLEN 16 typedef enum { ZSTD_no_overlap, - ZSTD_overlap_src_before_dst, + ZSTD_overlap_src_before_dst /* ZSTD_overlap_dst_before_src, */ } ZSTD_overlap_e; /*! ZSTD_wildcopy() : - * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ + * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. + * The src buffer must be before the dst buffer. + */ MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE -void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) +void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) { ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; BYTE* const oend = op + length; - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); - if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { - do - COPY8(op, ip) - while (op < oend); - } - else { - if ((length & 8) == 0) - COPY8(op, ip); - do { - COPY16(op, ip); - } - while (op < oend); - } -} - -/*! ZSTD_wildcopy_16min() : - * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ -MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE -void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) -{ - ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + length; + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); - assert(length >= 8); - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); - - if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { - do - COPY8(op, ip) - while (op < oend); - } - else { - if ((length & 8) == 0) - COPY8(op, ip); - do { + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first two COPY16() calls because the copy length is + * almost certain to be short, so the branches have different + * probabilities. + * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%. + * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%. + */ COPY16(op, ip); - } - while (op < oend); + COPY16(op, ip); + if (op >= oend) return; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); } } -MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ -{ - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = (BYTE*)dstEnd; - do - COPY8(op, ip) - while (op < oend); -} - /*-******************************************* * Private declarations @@ -323,7 +300,7 @@ _BitScanReverse(&r, val); return (unsigned)r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return 31 - __builtin_clz(val); + return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ return 31 - __CLZ(val); # else /* Software version */