Mercurial > public > mercurial-scm > hg
comparison contrib/python-zstandard/zstd/common/entropy_common.c @ 30434:2e484bdea8c4
zstd: vendor zstd 1.1.1
zstd is a new compression format and it is awesome, yielding
higher compression ratios and significantly faster compression
and decompression operations compared to zlib (our current
compression engine of choice) across the board.
We want zstd to be a 1st class citizen in Mercurial and to eventually
be the preferred compression format for various operations.
This patch starts the formal process of supporting zstd by vendoring
a copy of zstd. Why do we need to vendor zstd? Good question.
First, zstd is relatively new and not widely available yet. If we
didn't vendor zstd or distribute it with Mercurial, most users likely
wouldn't have zstd installed or even available to install. What good
is a feature if you can't use it? Vendoring and distributing the zstd
sources gives us the highest liklihood that zstd will be available to
Mercurial installs.
Second, the Python bindings to zstd (which will be vendored in a
separate changeset) make use of zstd APIs that are only available
via static linking. One reason they are only available via static
linking is that they are unstable and could change at any time.
While it might be possible for the Python bindings to attempt to
talk to different versions of the zstd C library, the safest thing to
do is link against a specific, known-working version of zstd. This
is why the Python zstd bindings themselves vendor zstd and why we
must as well. This also explains why the added files are in a
"python-zstandard" directory.
The added files are from the 1.1.1 release of zstd (Git commit
4c0b44f8ced84c4c8edfa07b564d31e4fa3e8885 from
https://github.com/facebook/zstd) and are added without modifications.
Not all files from the zstd "distribution" have been added. Notably
missing are files to support interacting with "legacy," pre-1.0
versions of zstd. The decision of which files to include is made by
the upstream python-zstandard project (which I'm the author of). The
files in this commit are a snapshot of the files from the 0.5.0
release of that project, Git commit
e637c1b214d5f869cf8116c550dcae23ec13b677 from
https://github.com/indygreg/python-zstandard.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Thu, 10 Nov 2016 21:45:29 -0800 |
parents | |
children | b54a2984cdd4 |
comparison
equal
deleted
inserted
replaced
30433:96f2f50d923f | 30434:2e484bdea8c4 |
---|---|
1 /* | |
2 Common functions of New Generation Entropy library | |
3 Copyright (C) 2016, Yann Collet. | |
4 | |
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
6 | |
7 Redistribution and use in source and binary forms, with or without | |
8 modification, are permitted provided that the following conditions are | |
9 met: | |
10 | |
11 * Redistributions of source code must retain the above copyright | |
12 notice, this list of conditions and the following disclaimer. | |
13 * Redistributions in binary form must reproduce the above | |
14 copyright notice, this list of conditions and the following disclaimer | |
15 in the documentation and/or other materials provided with the | |
16 distribution. | |
17 | |
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | |
30 You can contact the author at : | |
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c | |
33 *************************************************************************** */ | |
34 | |
35 /* ************************************* | |
36 * Dependencies | |
37 ***************************************/ | |
38 #include "mem.h" | |
39 #include "error_private.h" /* ERR_*, ERROR */ | |
40 #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ | |
41 #include "fse.h" | |
42 #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ | |
43 #include "huf.h" | |
44 | |
45 | |
46 /*-**************************************** | |
47 * FSE Error Management | |
48 ******************************************/ | |
49 unsigned FSE_isError(size_t code) { return ERR_isError(code); } | |
50 | |
51 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } | |
52 | |
53 | |
54 /* ************************************************************** | |
55 * HUF Error Management | |
56 ****************************************************************/ | |
57 unsigned HUF_isError(size_t code) { return ERR_isError(code); } | |
58 | |
59 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } | |
60 | |
61 | |
62 /*-************************************************************** | |
63 * FSE NCount encoding-decoding | |
64 ****************************************************************/ | |
65 static short FSE_abs(short a) { return (short)(a<0 ? -a : a); } | |
66 | |
67 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | |
68 const void* headerBuffer, size_t hbSize) | |
69 { | |
70 const BYTE* const istart = (const BYTE*) headerBuffer; | |
71 const BYTE* const iend = istart + hbSize; | |
72 const BYTE* ip = istart; | |
73 int nbBits; | |
74 int remaining; | |
75 int threshold; | |
76 U32 bitStream; | |
77 int bitCount; | |
78 unsigned charnum = 0; | |
79 int previous0 = 0; | |
80 | |
81 if (hbSize < 4) return ERROR(srcSize_wrong); | |
82 bitStream = MEM_readLE32(ip); | |
83 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ | |
84 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); | |
85 bitStream >>= 4; | |
86 bitCount = 4; | |
87 *tableLogPtr = nbBits; | |
88 remaining = (1<<nbBits)+1; | |
89 threshold = 1<<nbBits; | |
90 nbBits++; | |
91 | |
92 while ((remaining>1) & (charnum<=*maxSVPtr)) { | |
93 if (previous0) { | |
94 unsigned n0 = charnum; | |
95 while ((bitStream & 0xFFFF) == 0xFFFF) { | |
96 n0 += 24; | |
97 if (ip < iend-5) { | |
98 ip += 2; | |
99 bitStream = MEM_readLE32(ip) >> bitCount; | |
100 } else { | |
101 bitStream >>= 16; | |
102 bitCount += 16; | |
103 } } | |
104 while ((bitStream & 3) == 3) { | |
105 n0 += 3; | |
106 bitStream >>= 2; | |
107 bitCount += 2; | |
108 } | |
109 n0 += bitStream & 3; | |
110 bitCount += 2; | |
111 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); | |
112 while (charnum < n0) normalizedCounter[charnum++] = 0; | |
113 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | |
114 ip += bitCount>>3; | |
115 bitCount &= 7; | |
116 bitStream = MEM_readLE32(ip) >> bitCount; | |
117 } else { | |
118 bitStream >>= 2; | |
119 } } | |
120 { short const max = (short)((2*threshold-1)-remaining); | |
121 short count; | |
122 | |
123 if ((bitStream & (threshold-1)) < (U32)max) { | |
124 count = (short)(bitStream & (threshold-1)); | |
125 bitCount += nbBits-1; | |
126 } else { | |
127 count = (short)(bitStream & (2*threshold-1)); | |
128 if (count >= threshold) count -= max; | |
129 bitCount += nbBits; | |
130 } | |
131 | |
132 count--; /* extra accuracy */ | |
133 remaining -= FSE_abs(count); | |
134 normalizedCounter[charnum++] = count; | |
135 previous0 = !count; | |
136 while (remaining < threshold) { | |
137 nbBits--; | |
138 threshold >>= 1; | |
139 } | |
140 | |
141 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | |
142 ip += bitCount>>3; | |
143 bitCount &= 7; | |
144 } else { | |
145 bitCount -= (int)(8 * (iend - 4 - ip)); | |
146 ip = iend - 4; | |
147 } | |
148 bitStream = MEM_readLE32(ip) >> (bitCount & 31); | |
149 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ | |
150 if (remaining != 1) return ERROR(corruption_detected); | |
151 if (bitCount > 32) return ERROR(corruption_detected); | |
152 *maxSVPtr = charnum-1; | |
153 | |
154 ip += (bitCount+7)>>3; | |
155 return ip-istart; | |
156 } | |
157 | |
158 | |
159 /*! HUF_readStats() : | |
160 Read compact Huffman tree, saved by HUF_writeCTable(). | |
161 `huffWeight` is destination buffer. | |
162 @return : size read from `src` , or an error Code . | |
163 Note : Needed by HUF_readCTable() and HUF_readDTableX?() . | |
164 */ | |
165 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |
166 U32* nbSymbolsPtr, U32* tableLogPtr, | |
167 const void* src, size_t srcSize) | |
168 { | |
169 U32 weightTotal; | |
170 const BYTE* ip = (const BYTE*) src; | |
171 size_t iSize; | |
172 size_t oSize; | |
173 | |
174 if (!srcSize) return ERROR(srcSize_wrong); | |
175 iSize = ip[0]; | |
176 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ | |
177 | |
178 if (iSize >= 128) { /* special header */ | |
179 oSize = iSize - 127; | |
180 iSize = ((oSize+1)/2); | |
181 if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | |
182 if (oSize >= hwSize) return ERROR(corruption_detected); | |
183 ip += 1; | |
184 { U32 n; | |
185 for (n=0; n<oSize; n+=2) { | |
186 huffWeight[n] = ip[n/2] >> 4; | |
187 huffWeight[n+1] = ip[n/2] & 15; | |
188 } } } | |
189 else { /* header compressed with FSE (normal case) */ | |
190 if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | |
191 oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ | |
192 if (FSE_isError(oSize)) return oSize; | |
193 } | |
194 | |
195 /* collect weight stats */ | |
196 memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32)); | |
197 weightTotal = 0; | |
198 { U32 n; for (n=0; n<oSize; n++) { | |
199 if (huffWeight[n] >= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected); | |
200 rankStats[huffWeight[n]]++; | |
201 weightTotal += (1 << huffWeight[n]) >> 1; | |
202 } } | |
203 if (weightTotal == 0) return ERROR(corruption_detected); | |
204 | |
205 /* get last non-null symbol weight (implied, total must be 2^n) */ | |
206 { U32 const tableLog = BIT_highbit32(weightTotal) + 1; | |
207 if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected); | |
208 *tableLogPtr = tableLog; | |
209 /* determine last weight */ | |
210 { U32 const total = 1 << tableLog; | |
211 U32 const rest = total - weightTotal; | |
212 U32 const verif = 1 << BIT_highbit32(rest); | |
213 U32 const lastWeight = BIT_highbit32(rest) + 1; | |
214 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ | |
215 huffWeight[oSize] = (BYTE)lastWeight; | |
216 rankStats[lastWeight]++; | |
217 } } | |
218 | |
219 /* check tree construction validity */ | |
220 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ | |
221 | |
222 /* results */ | |
223 *nbSymbolsPtr = (U32)(oSize+1); | |
224 return iSize+1; | |
225 } |