Mercurial > public > mercurial-scm > hg
annotate contrib/python-zstandard/zstd/dictBuilder/cover.c @ 48674:f7086f6173f8 stable
dirstate-v2: rename the configuration to enable the format
The rename of the old experimental name was overlooked before the 6.0 release.
We rename everything to use the new name (and keep the released name as an alias
for compatibility).
Differential Revision: https://phab.mercurial-scm.org/D12129
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 01 Feb 2022 16:36:20 +0100 |
parents | de7838053207 |
children |
rev | line source |
---|---|
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1 /* |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
3 * All rights reserved. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
4 * |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
5 * This source code is licensed under both the BSD-style license (found in the |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
7 * in the COPYING file in the root directory of this source tree). |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
8 * You may select, at your option, one of the above-listed licenses. |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
9 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
10 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
11 /* ***************************************************************************** |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
12 * Constructs a dictionary using a heuristic based on the following paper: |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
13 * |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
14 * Liao, Petri, Moffat, Wirth |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
15 * Effective Construction of Relative Lempel-Ziv Dictionaries |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
16 * Published in WWW 2016. |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
17 * |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
18 * Adapted from code originally written by @ot (Giuseppe Ottaviano). |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
19 ******************************************************************************/ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
20 |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
21 /*-************************************* |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
22 * Dependencies |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
23 ***************************************/ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
24 #include <stdio.h> /* fprintf */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
25 #include <stdlib.h> /* malloc, free, qsort */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
26 #include <string.h> /* memset */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
27 #include <time.h> /* clock */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
28 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
29 #include "mem.h" /* read */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
30 #include "pool.h" |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
31 #include "threading.h" |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
32 #include "cover.h" |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
33 #include "zstd_internal.h" /* includes zstd.h */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
34 #ifndef ZDICT_STATIC_LINKING_ONLY |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
35 #define ZDICT_STATIC_LINKING_ONLY |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
36 #endif |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
37 #include "zdict.h" |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
38 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
39 /*-************************************* |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
40 * Constants |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
41 ***************************************/ |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
42 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
43 #define DEFAULT_SPLITPOINT 1.0 |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
44 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
45 /*-************************************* |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
46 * Console display |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
47 ***************************************/ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
48 static int g_displayLevel = 2; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
49 #define DISPLAY(...) \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
50 { \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
51 fprintf(stderr, __VA_ARGS__); \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
52 fflush(stderr); \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
53 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
54 #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
55 if (displayLevel >= l) { \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
56 DISPLAY(__VA_ARGS__); \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
57 } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
58 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
59 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
60 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
61 if (displayLevel >= l) { \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
62 if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
63 g_time = clock(); \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
64 DISPLAY(__VA_ARGS__); \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
65 } \ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
66 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
67 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
68 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
69 static clock_t g_time = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
70 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
71 /*-************************************* |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
72 * Hash table |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
73 *************************************** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
74 * A small specialized hash map for storing activeDmers. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
75 * The map does not resize, so if it becomes full it will loop forever. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
76 * Thus, the map must be large enough to store every value. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
77 * The map implements linear probing and keeps its load less than 0.5. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
78 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
79 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
80 #define MAP_EMPTY_VALUE ((U32)-1) |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
81 typedef struct COVER_map_pair_t_s { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
82 U32 key; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
83 U32 value; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
84 } COVER_map_pair_t; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
85 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
86 typedef struct COVER_map_s { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
87 COVER_map_pair_t *data; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
88 U32 sizeLog; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
89 U32 size; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
90 U32 sizeMask; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
91 } COVER_map_t; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
92 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
93 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
94 * Clear the map. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
95 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
96 static void COVER_map_clear(COVER_map_t *map) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
97 memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
98 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
99 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
100 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
101 * Initializes a map of the given size. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
102 * Returns 1 on success and 0 on failure. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
103 * The map must be destroyed with COVER_map_destroy(). |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
104 * The map is only guaranteed to be large enough to hold size elements. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
105 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
106 static int COVER_map_init(COVER_map_t *map, U32 size) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
107 map->sizeLog = ZSTD_highbit32(size) + 2; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
108 map->size = (U32)1 << map->sizeLog; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
109 map->sizeMask = map->size - 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
110 map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
111 if (!map->data) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
112 map->sizeLog = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
113 map->size = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
114 return 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
115 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
116 COVER_map_clear(map); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
117 return 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
118 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
119 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
120 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
121 * Internal hash function |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
122 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
123 static const U32 prime4bytes = 2654435761U; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
124 static U32 COVER_map_hash(COVER_map_t *map, U32 key) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
125 return (key * prime4bytes) >> (32 - map->sizeLog); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
126 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
127 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
128 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
129 * Helper function that returns the index that a key should be placed into. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
130 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
131 static U32 COVER_map_index(COVER_map_t *map, U32 key) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
132 const U32 hash = COVER_map_hash(map, key); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
133 U32 i; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
134 for (i = hash;; i = (i + 1) & map->sizeMask) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
135 COVER_map_pair_t *pos = &map->data[i]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
136 if (pos->value == MAP_EMPTY_VALUE) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
137 return i; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
138 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
139 if (pos->key == key) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
140 return i; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
141 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
142 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
143 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
144 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
145 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
146 * Returns the pointer to the value for key. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
147 * If key is not in the map, it is inserted and the value is set to 0. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
148 * The map must not be full. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
149 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
150 static U32 *COVER_map_at(COVER_map_t *map, U32 key) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
151 COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
152 if (pos->value == MAP_EMPTY_VALUE) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
153 pos->key = key; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
154 pos->value = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
155 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
156 return &pos->value; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
157 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
158 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
159 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
160 * Deletes key from the map if present. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
161 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
162 static void COVER_map_remove(COVER_map_t *map, U32 key) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
163 U32 i = COVER_map_index(map, key); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
164 COVER_map_pair_t *del = &map->data[i]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
165 U32 shift = 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
166 if (del->value == MAP_EMPTY_VALUE) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
167 return; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
168 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
169 for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
170 COVER_map_pair_t *const pos = &map->data[i]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
171 /* If the position is empty we are done */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
172 if (pos->value == MAP_EMPTY_VALUE) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
173 del->value = MAP_EMPTY_VALUE; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
174 return; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
175 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
176 /* If pos can be moved to del do so */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
177 if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
178 del->key = pos->key; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
179 del->value = pos->value; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
180 del = pos; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
181 shift = 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
182 } else { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
183 ++shift; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
184 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
185 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
186 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
187 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
188 /** |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
189 * Destroys a map that is inited with COVER_map_init(). |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
190 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
191 static void COVER_map_destroy(COVER_map_t *map) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
192 if (map->data) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
193 free(map->data); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
194 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
195 map->data = NULL; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
196 map->size = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
197 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
198 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
199 /*-************************************* |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
200 * Context |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
201 ***************************************/ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
202 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
203 typedef struct { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
204 const BYTE *samples; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
205 size_t *offsets; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
206 const size_t *samplesSizes; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
207 size_t nbSamples; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
208 size_t nbTrainSamples; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
209 size_t nbTestSamples; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
210 U32 *suffix; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
211 size_t suffixSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
212 U32 *freqs; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
213 U32 *dmerAt; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
214 unsigned d; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
215 } COVER_ctx_t; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
216 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
217 /* We need a global context for qsort... */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
218 static COVER_ctx_t *g_ctx = NULL; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
219 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
220 /*-************************************* |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
221 * Helper functions |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
222 ***************************************/ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
223 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
224 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
225 * Returns the sum of the sample sizes. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
226 */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
227 size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
228 size_t sum = 0; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
229 unsigned i; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
230 for (i = 0; i < nbSamples; ++i) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
231 sum += samplesSizes[i]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
232 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
233 return sum; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
234 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
235 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
236 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
237 * Returns -1 if the dmer at lp is less than the dmer at rp. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
238 * Return 0 if the dmers at lp and rp are equal. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
239 * Returns 1 if the dmer at lp is greater than the dmer at rp. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
240 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
241 static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
242 U32 const lhs = *(U32 const *)lp; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
243 U32 const rhs = *(U32 const *)rp; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
244 return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
245 } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
246 /** |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
247 * Faster version for d <= 8. |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
248 */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
249 static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
250 U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
251 U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
252 U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
253 if (lhs < rhs) { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
254 return -1; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
255 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
256 return (lhs > rhs); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
257 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
258 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
259 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
260 * Same as COVER_cmp() except ties are broken by pointer value |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
261 * NOTE: g_ctx must be set to call this function. A global is required because |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
262 * qsort doesn't take an opaque pointer. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
263 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
264 static int COVER_strict_cmp(const void *lp, const void *rp) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
265 int result = COVER_cmp(g_ctx, lp, rp); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
266 if (result == 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
267 result = lp < rp ? -1 : 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
268 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
269 return result; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
270 } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
271 /** |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
272 * Faster version for d <= 8. |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
273 */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
274 static int COVER_strict_cmp8(const void *lp, const void *rp) { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
275 int result = COVER_cmp8(g_ctx, lp, rp); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
276 if (result == 0) { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
277 result = lp < rp ? -1 : 1; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
278 } |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
279 return result; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
280 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
281 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
282 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
283 * Returns the first pointer in [first, last) whose element does not compare |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
284 * less than value. If no such element exists it returns last. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
285 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
286 static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
287 size_t value) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
288 size_t count = last - first; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
289 while (count != 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
290 size_t step = count / 2; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
291 const size_t *ptr = first; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
292 ptr += step; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
293 if (*ptr < value) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
294 first = ++ptr; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
295 count -= step + 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
296 } else { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
297 count = step; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
298 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
299 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
300 return first; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
301 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
302 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
303 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
304 * Generic groupBy function. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
305 * Groups an array sorted by cmp into groups with equivalent values. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
306 * Calls grp for each group. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
307 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
308 static void |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
309 COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
310 int (*cmp)(COVER_ctx_t *, const void *, const void *), |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
311 void (*grp)(COVER_ctx_t *, const void *, const void *)) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
312 const BYTE *ptr = (const BYTE *)data; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
313 size_t num = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
314 while (num < count) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
315 const BYTE *grpEnd = ptr + size; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
316 ++num; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
317 while (num < count && cmp(ctx, ptr, grpEnd) == 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
318 grpEnd += size; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
319 ++num; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
320 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
321 grp(ctx, ptr, grpEnd); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
322 ptr = grpEnd; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
323 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
324 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
325 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
326 /*-************************************* |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
327 * Cover functions |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
328 ***************************************/ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
329 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
330 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
331 * Called on each group of positions with the same dmer. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
332 * Counts the frequency of each dmer and saves it in the suffix array. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
333 * Fills `ctx->dmerAt`. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
334 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
335 static void COVER_group(COVER_ctx_t *ctx, const void *group, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
336 const void *groupEnd) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
337 /* The group consists of all the positions with the same first d bytes. */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
338 const U32 *grpPtr = (const U32 *)group; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
339 const U32 *grpEnd = (const U32 *)groupEnd; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
340 /* The dmerId is how we will reference this dmer. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
341 * This allows us to map the whole dmer space to a much smaller space, the |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
342 * size of the suffix array. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
343 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
344 const U32 dmerId = (U32)(grpPtr - ctx->suffix); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
345 /* Count the number of samples this dmer shows up in */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
346 U32 freq = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
347 /* Details */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
348 const size_t *curOffsetPtr = ctx->offsets; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
349 const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
350 /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
351 * different sample than the last. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
352 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
353 size_t curSampleEnd = ctx->offsets[0]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
354 for (; grpPtr != grpEnd; ++grpPtr) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
355 /* Save the dmerId for this position so we can get back to it. */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
356 ctx->dmerAt[*grpPtr] = dmerId; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
357 /* Dictionaries only help for the first reference to the dmer. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
358 * After that zstd can reference the match from the previous reference. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
359 * So only count each dmer once for each sample it is in. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
360 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
361 if (*grpPtr < curSampleEnd) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
362 continue; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
363 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
364 freq += 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
365 /* Binary search to find the end of the sample *grpPtr is in. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
366 * In the common case that grpPtr + 1 == grpEnd we can skip the binary |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
367 * search because the loop is over. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
368 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
369 if (grpPtr + 1 != grpEnd) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
370 const size_t *sampleEndPtr = |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
371 COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
372 curSampleEnd = *sampleEndPtr; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
373 curOffsetPtr = sampleEndPtr + 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
374 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
375 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
376 /* At this point we are never going to look at this segment of the suffix |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
377 * array again. We take advantage of this fact to save memory. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
378 * We store the frequency of the dmer in the first position of the group, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
379 * which is dmerId. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
380 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
381 ctx->suffix[dmerId] = freq; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
382 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
383 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
384 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
385 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
386 * Selects the best segment in an epoch. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
387 * Segments of are scored according to the function: |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
388 * |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
389 * Let F(d) be the frequency of dmer d. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
390 * Let S_i be the dmer at position i of segment S which has length k. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
391 * |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
392 * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
393 * |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
394 * Once the dmer d is in the dictionary we set F(d) = 0. |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
395 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
396 static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
397 COVER_map_t *activeDmers, U32 begin, |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
398 U32 end, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
399 ZDICT_cover_params_t parameters) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
400 /* Constants */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
401 const U32 k = parameters.k; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
402 const U32 d = parameters.d; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
403 const U32 dmersInK = k - d + 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
404 /* Try each segment (activeSegment) and save the best (bestSegment) */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
405 COVER_segment_t bestSegment = {0, 0, 0}; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
406 COVER_segment_t activeSegment; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
407 /* Reset the activeDmers in the segment */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
408 COVER_map_clear(activeDmers); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
409 /* The activeSegment starts at the beginning of the epoch. */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
410 activeSegment.begin = begin; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
411 activeSegment.end = begin; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
412 activeSegment.score = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
413 /* Slide the activeSegment through the whole epoch. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
414 * Save the best segment in bestSegment. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
415 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
416 while (activeSegment.end < end) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
417 /* The dmerId for the dmer at the next position */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
418 U32 newDmer = ctx->dmerAt[activeSegment.end]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
419 /* The entry in activeDmers for this dmerId */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
420 U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
421 /* If the dmer isn't already present in the segment add its score. */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
422 if (*newDmerOcc == 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
423 /* The paper suggest using the L-0.5 norm, but experiments show that it |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
424 * doesn't help. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
425 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
426 activeSegment.score += freqs[newDmer]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
427 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
428 /* Add the dmer to the segment */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
429 activeSegment.end += 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
430 *newDmerOcc += 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
431 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
432 /* If the window is now too large, drop the first position */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
433 if (activeSegment.end - activeSegment.begin == dmersInK + 1) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
434 U32 delDmer = ctx->dmerAt[activeSegment.begin]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
435 U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
436 activeSegment.begin += 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
437 *delDmerOcc -= 1; |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
438 /* If this is the last occurrence of the dmer, subtract its score */ |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
439 if (*delDmerOcc == 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
440 COVER_map_remove(activeDmers, delDmer); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
441 activeSegment.score -= freqs[delDmer]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
442 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
443 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
444 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
445 /* If this segment is the best so far save it */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
446 if (activeSegment.score > bestSegment.score) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
447 bestSegment = activeSegment; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
448 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
449 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
450 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
451 /* Trim off the zero frequency head and tail from the segment. */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
452 U32 newBegin = bestSegment.end; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
453 U32 newEnd = bestSegment.begin; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
454 U32 pos; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
455 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
456 U32 freq = freqs[ctx->dmerAt[pos]]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
457 if (freq != 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
458 newBegin = MIN(newBegin, pos); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
459 newEnd = pos + 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
460 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
461 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
462 bestSegment.begin = newBegin; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
463 bestSegment.end = newEnd; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
464 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
465 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
466 /* Zero out the frequency of each dmer covered by the chosen segment. */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
467 U32 pos; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
468 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
469 freqs[ctx->dmerAt[pos]] = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
470 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
471 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
472 return bestSegment; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
473 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
474 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
475 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
476 * Check the validity of the parameters. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
477 * Returns non-zero if the parameters are valid and 0 otherwise. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
478 */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
479 static int COVER_checkParameters(ZDICT_cover_params_t parameters, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
480 size_t maxDictSize) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
481 /* k and d are required parameters */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
482 if (parameters.d == 0 || parameters.k == 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
483 return 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
484 } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
485 /* k <= maxDictSize */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
486 if (parameters.k > maxDictSize) { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
487 return 0; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
488 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
489 /* d <= k */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
490 if (parameters.d > parameters.k) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
491 return 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
492 } |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
493 /* 0 < splitPoint <= 1 */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
494 if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
495 return 0; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
496 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
497 return 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
498 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
499 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
500 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
501 * Clean up a context initialized with `COVER_ctx_init()`. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
502 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
503 static void COVER_ctx_destroy(COVER_ctx_t *ctx) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
504 if (!ctx) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
505 return; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
506 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
507 if (ctx->suffix) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
508 free(ctx->suffix); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
509 ctx->suffix = NULL; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
510 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
511 if (ctx->freqs) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
512 free(ctx->freqs); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
513 ctx->freqs = NULL; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
514 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
515 if (ctx->dmerAt) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
516 free(ctx->dmerAt); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
517 ctx->dmerAt = NULL; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
518 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
519 if (ctx->offsets) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
520 free(ctx->offsets); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
521 ctx->offsets = NULL; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
522 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
523 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
524 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
525 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
526 * Prepare a context for dictionary building. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
527 * The context is only dependent on the parameter `d` and can used multiple |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
528 * times. |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
529 * Returns 0 on success or error code on error. |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
530 * The context must be destroyed with `COVER_ctx_destroy()`. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
531 */ |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
532 static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
533 const size_t *samplesSizes, unsigned nbSamples, |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
534 unsigned d, double splitPoint) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
535 const BYTE *const samples = (const BYTE *)samplesBuffer; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
536 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
537 /* Split samples into testing and training sets */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
538 const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
539 const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
540 const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
541 const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
542 /* Checks */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
543 if (totalSamplesSize < MAX(d, sizeof(U64)) || |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
544 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
545 DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
546 (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
547 return ERROR(srcSize_wrong); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
548 } |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
549 /* Check if there are at least 5 training samples */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
550 if (nbTrainSamples < 5) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
551 DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
552 return ERROR(srcSize_wrong); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
553 } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
554 /* Check if there's testing sample */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
555 if (nbTestSamples < 1) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
556 DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
557 return ERROR(srcSize_wrong); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
558 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
559 /* Zero the context */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
560 memset(ctx, 0, sizeof(*ctx)); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
561 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
562 (unsigned)trainingSamplesSize); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
563 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
564 (unsigned)testSamplesSize); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
565 ctx->samples = samples; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
566 ctx->samplesSizes = samplesSizes; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
567 ctx->nbSamples = nbSamples; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
568 ctx->nbTrainSamples = nbTrainSamples; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
569 ctx->nbTestSamples = nbTestSamples; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
570 /* Partial suffix array */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
571 ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
572 ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
573 /* Maps index to the dmerID */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
574 ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
575 /* The offsets of each file */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
576 ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
577 if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
578 DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
579 COVER_ctx_destroy(ctx); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
580 return ERROR(memory_allocation); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
581 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
582 ctx->freqs = NULL; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
583 ctx->d = d; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
584 |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
585 /* Fill offsets from the samplesSizes */ |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
586 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
587 U32 i; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
588 ctx->offsets[0] = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
589 for (i = 1; i <= nbSamples; ++i) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
590 ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
591 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
592 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
593 DISPLAYLEVEL(2, "Constructing partial suffix array\n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
594 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
595 /* suffix is a partial suffix array. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
596 * It only sorts suffixes by their first parameters.d bytes. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
597 * The sort is stable, so each dmer group is sorted by position in input. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
598 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
599 U32 i; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
600 for (i = 0; i < ctx->suffixSize; ++i) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
601 ctx->suffix[i] = i; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
602 } |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
603 /* qsort doesn't take an opaque pointer, so pass as a global. |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
604 * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
605 */ |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
606 g_ctx = ctx; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
607 #if defined(__OpenBSD__) |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
608 mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
609 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
610 #else |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
611 qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
612 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
613 #endif |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
614 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
615 DISPLAYLEVEL(2, "Computing frequencies\n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
616 /* For each dmer group (group of positions with the same first d bytes): |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
617 * 1. For each position we set dmerAt[position] = dmerID. The dmerID is |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
618 * (groupBeginPtr - suffix). This allows us to go from position to |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
619 * dmerID so we can look up values in freq. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
620 * 2. We calculate how many samples the dmer occurs in and save it in |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
621 * freqs[dmerId]. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
622 */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
623 COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
624 (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
625 ctx->freqs = ctx->suffix; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
626 ctx->suffix = NULL; |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
627 return 0; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
628 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
629 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
630 void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
631 { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
632 const double ratio = (double)nbDmers / maxDictSize; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
633 if (ratio >= 10) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
634 return; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
635 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
636 LOCALDISPLAYLEVEL(displayLevel, 1, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
637 "WARNING: The maximum dictionary size %u is too large " |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
638 "compared to the source size %u! " |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
639 "size(source)/size(dictionary) = %f, but it should be >= " |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
640 "10! This may lead to a subpar dictionary! We recommend " |
43994
de7838053207
zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42937
diff
changeset
|
641 "training on sources at least 10x, and preferably 100x " |
de7838053207
zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42937
diff
changeset
|
642 "the size of the dictionary! \n", (U32)maxDictSize, |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
643 (U32)nbDmers, ratio); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
644 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
645 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
646 COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
647 U32 nbDmers, U32 k, U32 passes) |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
648 { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
649 const U32 minEpochSize = k * 10; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
650 COVER_epoch_info_t epochs; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
651 epochs.num = MAX(1, maxDictSize / k / passes); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
652 epochs.size = nbDmers / epochs.num; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
653 if (epochs.size >= minEpochSize) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
654 assert(epochs.size * epochs.num <= nbDmers); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
655 return epochs; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
656 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
657 epochs.size = MIN(minEpochSize, nbDmers); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
658 epochs.num = nbDmers / epochs.size; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
659 assert(epochs.size * epochs.num <= nbDmers); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
660 return epochs; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
661 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
662 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
663 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
664 * Given the prepared context build the dictionary. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
665 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
666 static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
667 COVER_map_t *activeDmers, void *dictBuffer, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
668 size_t dictBufferCapacity, |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
669 ZDICT_cover_params_t parameters) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
670 BYTE *const dict = (BYTE *)dictBuffer; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
671 size_t tail = dictBufferCapacity; |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
672 /* Divide the data into epochs. We will select one segment from each epoch. */ |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
673 const COVER_epoch_info_t epochs = COVER_computeEpochs( |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
674 (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
675 const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3)); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
676 size_t zeroScoreRun = 0; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
677 size_t epoch; |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
678 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
679 (U32)epochs.num, (U32)epochs.size); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
680 /* Loop through the epochs until there are no more segments or the dictionary |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
681 * is full. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
682 */ |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
683 for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
684 const U32 epochBegin = (U32)(epoch * epochs.size); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
685 const U32 epochEnd = epochBegin + epochs.size; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
686 size_t segmentSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
687 /* Select a segment */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
688 COVER_segment_t segment = COVER_selectSegment( |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
689 ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
690 /* If the segment covers no dmers, then we are out of content. |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
691 * There may be new content in other epochs, for continue for some time. |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
692 */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
693 if (segment.score == 0) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
694 if (++zeroScoreRun >= maxZeroScoreRun) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
695 break; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
696 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
697 continue; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
698 } |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
699 zeroScoreRun = 0; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
700 /* Trim the segment if necessary and if it is too small then we are done */ |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
701 segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
702 if (segmentSize < parameters.d) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
703 break; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
704 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
705 /* We fill the dictionary from the back to allow the best segments to be |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
706 * referenced with the smallest offsets. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
707 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
708 tail -= segmentSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
709 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
710 DISPLAYUPDATE( |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
711 2, "\r%u%% ", |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
712 (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
713 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
714 DISPLAYLEVEL(2, "\r%79s\r", ""); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
715 return tail; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
716 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
717 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
718 ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
719 void *dictBuffer, size_t dictBufferCapacity, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
720 const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
721 ZDICT_cover_params_t parameters) |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
722 { |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
723 BYTE* const dict = (BYTE*)dictBuffer; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
724 COVER_ctx_t ctx; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
725 COVER_map_t activeDmers; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
726 parameters.splitPoint = 1.0; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
727 /* Initialize global data */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
728 g_displayLevel = parameters.zParams.notificationLevel; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
729 /* Checks */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
730 if (!COVER_checkParameters(parameters, dictBufferCapacity)) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
731 DISPLAYLEVEL(1, "Cover parameters incorrect\n"); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
732 return ERROR(parameter_outOfBound); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
733 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
734 if (nbSamples == 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
735 DISPLAYLEVEL(1, "Cover must have at least one input file\n"); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
736 return ERROR(srcSize_wrong); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
737 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
738 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
739 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
740 ZDICT_DICTSIZE_MIN); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
741 return ERROR(dstSize_tooSmall); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
742 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
743 /* Initialize context and activeDmers */ |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
744 { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
745 size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
746 parameters.d, parameters.splitPoint); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
747 if (ZSTD_isError(initVal)) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
748 return initVal; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
749 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
750 } |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
751 COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
752 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
753 DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
754 COVER_ctx_destroy(&ctx); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
755 return ERROR(memory_allocation); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
756 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
757 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
758 DISPLAYLEVEL(2, "Building dictionary\n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
759 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
760 const size_t tail = |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
761 COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
762 dictBufferCapacity, parameters); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
763 const size_t dictionarySize = ZDICT_finalizeDictionary( |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
764 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
765 samplesBuffer, samplesSizes, nbSamples, parameters.zParams); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
766 if (!ZSTD_isError(dictionarySize)) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
767 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
768 (unsigned)dictionarySize); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
769 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
770 COVER_ctx_destroy(&ctx); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
771 COVER_map_destroy(&activeDmers); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
772 return dictionarySize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
773 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
774 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
775 |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
776 |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
777 |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
778 size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
779 const size_t *samplesSizes, const BYTE *samples, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
780 size_t *offsets, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
781 size_t nbTrainSamples, size_t nbSamples, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
782 BYTE *const dict, size_t dictBufferCapacity) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
783 size_t totalCompressedSize = ERROR(GENERIC); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
784 /* Pointers */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
785 ZSTD_CCtx *cctx; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
786 ZSTD_CDict *cdict; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
787 void *dst; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
788 /* Local variables */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
789 size_t dstCapacity; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
790 size_t i; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
791 /* Allocate dst with enough space to compress the maximum sized sample */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
792 { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
793 size_t maxSampleSize = 0; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
794 i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
795 for (; i < nbSamples; ++i) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
796 maxSampleSize = MAX(samplesSizes[i], maxSampleSize); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
797 } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
798 dstCapacity = ZSTD_compressBound(maxSampleSize); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
799 dst = malloc(dstCapacity); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
800 } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
801 /* Create the cctx and cdict */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
802 cctx = ZSTD_createCCtx(); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
803 cdict = ZSTD_createCDict(dict, dictBufferCapacity, |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
804 parameters.zParams.compressionLevel); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
805 if (!dst || !cctx || !cdict) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
806 goto _compressCleanup; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
807 } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
808 /* Compress each sample and sum their sizes (or error) */ |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
809 totalCompressedSize = dictBufferCapacity; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
810 i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
811 for (; i < nbSamples; ++i) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
812 const size_t size = ZSTD_compress_usingCDict( |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
813 cctx, dst, dstCapacity, samples + offsets[i], |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
814 samplesSizes[i], cdict); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
815 if (ZSTD_isError(size)) { |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
816 totalCompressedSize = size; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
817 goto _compressCleanup; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
818 } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
819 totalCompressedSize += size; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
820 } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
821 _compressCleanup: |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
822 ZSTD_freeCCtx(cctx); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
823 ZSTD_freeCDict(cdict); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
824 if (dst) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
825 free(dst); |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
826 } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
827 return totalCompressedSize; |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
828 } |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
829 |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
830 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
831 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
832 * Initialize the `COVER_best_t`. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
833 */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
834 void COVER_best_init(COVER_best_t *best) { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
835 if (best==NULL) return; /* compatible with init on NULL */ |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
836 (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
837 (void)ZSTD_pthread_cond_init(&best->cond, NULL); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
838 best->liveJobs = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
839 best->dict = NULL; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
840 best->dictSize = 0; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
841 best->compressedSize = (size_t)-1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
842 memset(&best->parameters, 0, sizeof(best->parameters)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
843 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
844 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
845 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
846 * Wait until liveJobs == 0. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
847 */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
848 void COVER_best_wait(COVER_best_t *best) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
849 if (!best) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
850 return; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
851 } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
852 ZSTD_pthread_mutex_lock(&best->mutex); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
853 while (best->liveJobs != 0) { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
854 ZSTD_pthread_cond_wait(&best->cond, &best->mutex); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
855 } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
856 ZSTD_pthread_mutex_unlock(&best->mutex); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
857 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
858 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
859 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
860 * Call COVER_best_wait() and then destroy the COVER_best_t. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
861 */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
862 void COVER_best_destroy(COVER_best_t *best) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
863 if (!best) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
864 return; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
865 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
866 COVER_best_wait(best); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
867 if (best->dict) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
868 free(best->dict); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
869 } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
870 ZSTD_pthread_mutex_destroy(&best->mutex); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
871 ZSTD_pthread_cond_destroy(&best->cond); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
872 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
873 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
874 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
875 * Called when a thread is about to be launched. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
876 * Increments liveJobs. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
877 */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
878 void COVER_best_start(COVER_best_t *best) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
879 if (!best) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
880 return; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
881 } |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
882 ZSTD_pthread_mutex_lock(&best->mutex); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
883 ++best->liveJobs; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
884 ZSTD_pthread_mutex_unlock(&best->mutex); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
885 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
886 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
887 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
888 * Called when a thread finishes executing, both on error or success. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
889 * Decrements liveJobs and signals any waiting threads if liveJobs == 0. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
890 * If this dictionary is the best so far save it and its parameters. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
891 */ |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
892 void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
893 COVER_dictSelection_t selection) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
894 void* dict = selection.dictContent; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
895 size_t compressedSize = selection.totalCompressedSize; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
896 size_t dictSize = selection.dictSize; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
897 if (!best) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
898 return; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
899 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
900 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
901 size_t liveJobs; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
902 ZSTD_pthread_mutex_lock(&best->mutex); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
903 --best->liveJobs; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
904 liveJobs = best->liveJobs; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
905 /* If the new dictionary is better */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
906 if (compressedSize < best->compressedSize) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
907 /* Allocate space if necessary */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
908 if (!best->dict || best->dictSize < dictSize) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
909 if (best->dict) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
910 free(best->dict); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
911 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
912 best->dict = malloc(dictSize); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
913 if (!best->dict) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
914 best->compressedSize = ERROR(GENERIC); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
915 best->dictSize = 0; |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
916 ZSTD_pthread_cond_signal(&best->cond); |
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
917 ZSTD_pthread_mutex_unlock(&best->mutex); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
918 return; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
919 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
920 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
921 /* Save the dictionary, parameters, and size */ |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
922 if (dict) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
923 memcpy(best->dict, dict, dictSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
924 best->dictSize = dictSize; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
925 best->parameters = parameters; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
926 best->compressedSize = compressedSize; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
927 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
928 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
929 if (liveJobs == 0) { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
930 ZSTD_pthread_cond_broadcast(&best->cond); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
931 } |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
932 ZSTD_pthread_mutex_unlock(&best->mutex); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
933 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
934 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
935 |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
936 COVER_dictSelection_t COVER_dictSelectionError(size_t error) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
937 COVER_dictSelection_t selection = { NULL, 0, error }; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
938 return selection; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
939 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
940 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
941 unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
942 return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
943 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
944 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
945 void COVER_dictSelectionFree(COVER_dictSelection_t selection){ |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
946 free(selection.dictContent); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
947 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
948 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
949 COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
950 size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
951 size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
952 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
953 size_t largestDict = 0; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
954 size_t largestCompressed = 0; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
955 BYTE* customDictContentEnd = customDictContent + dictContentSize; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
956 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
957 BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
958 BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
959 double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
960 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
961 if (!largestDictbuffer || !candidateDictBuffer) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
962 free(largestDictbuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
963 free(candidateDictBuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
964 return COVER_dictSelectionError(dictContentSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
965 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
966 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
967 /* Initial dictionary size and compressed size */ |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
968 memcpy(largestDictbuffer, customDictContent, dictContentSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
969 dictContentSize = ZDICT_finalizeDictionary( |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
970 largestDictbuffer, dictContentSize, customDictContent, dictContentSize, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
971 samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
972 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
973 if (ZDICT_isError(dictContentSize)) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
974 free(largestDictbuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
975 free(candidateDictBuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
976 return COVER_dictSelectionError(dictContentSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
977 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
978 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
979 totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
980 samplesBuffer, offsets, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
981 nbCheckSamples, nbSamples, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
982 largestDictbuffer, dictContentSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
983 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
984 if (ZSTD_isError(totalCompressedSize)) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
985 free(largestDictbuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
986 free(candidateDictBuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
987 return COVER_dictSelectionError(totalCompressedSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
988 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
989 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
990 if (params.shrinkDict == 0) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
991 COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
992 free(candidateDictBuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
993 return selection; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
994 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
995 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
996 largestDict = dictContentSize; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
997 largestCompressed = totalCompressedSize; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
998 dictContentSize = ZDICT_DICTSIZE_MIN; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
999 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1000 /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1001 while (dictContentSize < largestDict) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1002 memcpy(candidateDictBuffer, largestDictbuffer, largestDict); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1003 dictContentSize = ZDICT_finalizeDictionary( |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1004 candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1005 samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1006 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1007 if (ZDICT_isError(dictContentSize)) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1008 free(largestDictbuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1009 free(candidateDictBuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1010 return COVER_dictSelectionError(dictContentSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1011 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1012 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1013 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1014 totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1015 samplesBuffer, offsets, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1016 nbCheckSamples, nbSamples, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1017 candidateDictBuffer, dictContentSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1018 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1019 if (ZSTD_isError(totalCompressedSize)) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1020 free(largestDictbuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1021 free(candidateDictBuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1022 return COVER_dictSelectionError(totalCompressedSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1023 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1024 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1025 if (totalCompressedSize <= largestCompressed * regressionTolerance) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1026 COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1027 free(largestDictbuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1028 return selection; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1029 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1030 dictContentSize *= 2; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1031 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1032 dictContentSize = largestDict; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1033 totalCompressedSize = largestCompressed; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1034 { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1035 COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1036 free(candidateDictBuffer); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1037 return selection; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1038 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1039 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1040 |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1041 /** |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1042 * Parameters for COVER_tryParameters(). |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1043 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1044 typedef struct COVER_tryParameters_data_s { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1045 const COVER_ctx_t *ctx; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1046 COVER_best_t *best; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1047 size_t dictBufferCapacity; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1048 ZDICT_cover_params_t parameters; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1049 } COVER_tryParameters_data_t; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1050 |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1051 /** |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1052 * Tries a set of parameters and updates the COVER_best_t with the results. |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1053 * This function is thread safe if zstd is compiled with multithreaded support. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1054 * It takes its parameters as an *OWNING* opaque pointer to support threading. |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1055 */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1056 static void COVER_tryParameters(void *opaque) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1057 /* Save parameters as local variables */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1058 COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1059 const COVER_ctx_t *const ctx = data->ctx; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1060 const ZDICT_cover_params_t parameters = data->parameters; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1061 size_t dictBufferCapacity = data->dictBufferCapacity; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1062 size_t totalCompressedSize = ERROR(GENERIC); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1063 /* Allocate space for hash table, dict, and freqs */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1064 COVER_map_t activeDmers; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1065 BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1066 COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1067 U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1068 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1069 DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1070 goto _cleanup; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1071 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1072 if (!dict || !freqs) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1073 DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1074 goto _cleanup; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1075 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1076 /* Copy the frequencies because we need to modify them */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1077 memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1078 /* Build the dictionary */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1079 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1080 const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1081 dictBufferCapacity, parameters); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1082 selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1083 ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1084 totalCompressedSize); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1085 |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1086 if (COVER_dictSelectionIsError(selection)) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1087 DISPLAYLEVEL(1, "Failed to select dictionary\n"); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1088 goto _cleanup; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1089 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1090 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1091 _cleanup: |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1092 free(dict); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1093 COVER_best_finish(data->best, parameters, selection); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1094 free(data); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1095 COVER_map_destroy(&activeDmers); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1096 COVER_dictSelectionFree(selection); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1097 if (freqs) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1098 free(freqs); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1099 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1100 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1101 |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1102 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1103 void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1104 const size_t *samplesSizes, unsigned nbSamples, |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1105 ZDICT_cover_params_t *parameters) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1106 /* constants */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1107 const unsigned nbThreads = parameters->nbThreads; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1108 const double splitPoint = |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1109 parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1110 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1111 const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1112 const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1113 const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1114 const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1115 const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1116 const unsigned kIterations = |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1117 (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1118 const unsigned shrinkDict = 0; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1119 /* Local variables */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1120 const int displayLevel = parameters->zParams.notificationLevel; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1121 unsigned iteration = 1; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1122 unsigned d; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1123 unsigned k; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1124 COVER_best_t best; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1125 POOL_ctx *pool = NULL; |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1126 int warned = 0; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1127 |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1128 /* Checks */ |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1129 if (splitPoint <= 0 || splitPoint > 1) { |
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1130 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1131 return ERROR(parameter_outOfBound); |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1132 } |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1133 if (kMinK < kMaxD || kMaxK < kMinK) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1134 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1135 return ERROR(parameter_outOfBound); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1136 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1137 if (nbSamples == 0) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1138 DISPLAYLEVEL(1, "Cover must have at least one input file\n"); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1139 return ERROR(srcSize_wrong); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1140 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1141 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1142 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1143 ZDICT_DICTSIZE_MIN); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1144 return ERROR(dstSize_tooSmall); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1145 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1146 if (nbThreads > 1) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1147 pool = POOL_create(nbThreads, 1); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1148 if (!pool) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1149 return ERROR(memory_allocation); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1150 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1151 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1152 /* Initialization */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1153 COVER_best_init(&best); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1154 /* Turn down global display level to clean up display at level 2 and below */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1155 g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1156 /* Loop through d first because each new value needs a new context */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1157 LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1158 kIterations); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1159 for (d = kMinD; d <= kMaxD; d += 2) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1160 /* Initialize the context for this value of d */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1161 COVER_ctx_t ctx; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1162 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1163 { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1164 const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1165 if (ZSTD_isError(initVal)) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1166 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1167 COVER_best_destroy(&best); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1168 POOL_free(pool); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1169 return initVal; |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1170 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1171 } |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1172 if (!warned) { |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1173 COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); |
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1174 warned = 1; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1175 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1176 /* Loop through k reusing the same context */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1177 for (k = kMinK; k <= kMaxK; k += kStepSize) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1178 /* Prepare the arguments */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1179 COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1180 sizeof(COVER_tryParameters_data_t)); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1181 LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1182 if (!data) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1183 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1184 COVER_best_destroy(&best); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1185 COVER_ctx_destroy(&ctx); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1186 POOL_free(pool); |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1187 return ERROR(memory_allocation); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1188 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1189 data->ctx = &ctx; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1190 data->best = &best; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1191 data->dictBufferCapacity = dictBufferCapacity; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1192 data->parameters = *parameters; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1193 data->parameters.k = k; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1194 data->parameters.d = d; |
40121
73fef626dae3
zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents:
37495
diff
changeset
|
1195 data->parameters.splitPoint = splitPoint; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1196 data->parameters.steps = kSteps; |
42937
69de49c4e39c
zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents:
42070
diff
changeset
|
1197 data->parameters.shrinkDict = shrinkDict; |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1198 data->parameters.zParams.notificationLevel = g_displayLevel; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1199 /* Check the parameters */ |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1200 if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1201 DISPLAYLEVEL(1, "Cover parameters incorrect\n"); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1202 free(data); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1203 continue; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1204 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1205 /* Call the function and pass ownership of data to it */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1206 COVER_best_start(&best); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1207 if (pool) { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1208 POOL_add(pool, &COVER_tryParameters, data); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1209 } else { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1210 COVER_tryParameters(data); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1211 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1212 /* Print status */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1213 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", |
42070
675775c33ab6
zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents:
40121
diff
changeset
|
1214 (unsigned)((iteration * 100) / kIterations)); |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1215 ++iteration; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1216 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1217 COVER_best_wait(&best); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1218 COVER_ctx_destroy(&ctx); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1219 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1220 LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1221 /* Fill the output buffer and parameters with output of the best parameters */ |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1222 { |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1223 const size_t dictSize = best.dictSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1224 if (ZSTD_isError(best.compressedSize)) { |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1225 const size_t compressedSize = best.compressedSize; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1226 COVER_best_destroy(&best); |
37495
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1227 POOL_free(pool); |
b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30895
diff
changeset
|
1228 return compressedSize; |
30895
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1229 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1230 *parameters = best.parameters; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1231 memcpy(dictBuffer, best.dict, dictSize); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1232 COVER_best_destroy(&best); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1233 POOL_free(pool); |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1234 return dictSize; |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1235 } |
c32454d69b85
zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff
changeset
|
1236 } |