annotate contrib/python-zstandard/zstd/dictBuilder/cover.c @ 48674:f7086f6173f8 stable

dirstate-v2: rename the configuration to enable the format The rename of the old experimental name was overlooked before the 6.0 release. We rename everything to use the new name (and keep the released name as an alias for compatibility). Differential Revision: https://phab.mercurial-scm.org/D12129
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 01 Feb 2022 16:36:20 +0100
parents de7838053207
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1 /*
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
3 * All rights reserved.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
4 *
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
5 * This source code is licensed under both the BSD-style license (found in the
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
7 * in the COPYING file in the root directory of this source tree).
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
8 * You may select, at your option, one of the above-listed licenses.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
9 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
10
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
11 /* *****************************************************************************
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
12 * Constructs a dictionary using a heuristic based on the following paper:
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
13 *
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
14 * Liao, Petri, Moffat, Wirth
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
15 * Effective Construction of Relative Lempel-Ziv Dictionaries
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
16 * Published in WWW 2016.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
17 *
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
18 * Adapted from code originally written by @ot (Giuseppe Ottaviano).
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
19 ******************************************************************************/
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
20
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
21 /*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
22 * Dependencies
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
23 ***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
24 #include <stdio.h> /* fprintf */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
25 #include <stdlib.h> /* malloc, free, qsort */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
26 #include <string.h> /* memset */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
27 #include <time.h> /* clock */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
28
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
29 #include "mem.h" /* read */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
30 #include "pool.h"
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
31 #include "threading.h"
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
32 #include "cover.h"
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
33 #include "zstd_internal.h" /* includes zstd.h */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
34 #ifndef ZDICT_STATIC_LINKING_ONLY
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
35 #define ZDICT_STATIC_LINKING_ONLY
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
36 #endif
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
37 #include "zdict.h"
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
38
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
39 /*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
40 * Constants
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
41 ***************************************/
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
42 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
43 #define DEFAULT_SPLITPOINT 1.0
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
44
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
45 /*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
46 * Console display
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
47 ***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
48 static int g_displayLevel = 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
49 #define DISPLAY(...) \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
50 { \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
51 fprintf(stderr, __VA_ARGS__); \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
52 fflush(stderr); \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
53 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
54 #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
55 if (displayLevel >= l) { \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
56 DISPLAY(__VA_ARGS__); \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
57 } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
58 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
59
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
60 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
61 if (displayLevel >= l) { \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
62 if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
63 g_time = clock(); \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
64 DISPLAY(__VA_ARGS__); \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
65 } \
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
66 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
67 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
68 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
69 static clock_t g_time = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
70
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
71 /*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
72 * Hash table
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
73 ***************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
74 * A small specialized hash map for storing activeDmers.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
75 * The map does not resize, so if it becomes full it will loop forever.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
76 * Thus, the map must be large enough to store every value.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
77 * The map implements linear probing and keeps its load less than 0.5.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
78 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
79
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
80 #define MAP_EMPTY_VALUE ((U32)-1)
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
81 typedef struct COVER_map_pair_t_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
82 U32 key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
83 U32 value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
84 } COVER_map_pair_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
85
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
86 typedef struct COVER_map_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
87 COVER_map_pair_t *data;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
88 U32 sizeLog;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
89 U32 size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
90 U32 sizeMask;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
91 } COVER_map_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
92
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
93 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
94 * Clear the map.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
95 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
96 static void COVER_map_clear(COVER_map_t *map) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
97 memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
98 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
99
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
100 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
101 * Initializes a map of the given size.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
102 * Returns 1 on success and 0 on failure.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
103 * The map must be destroyed with COVER_map_destroy().
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
104 * The map is only guaranteed to be large enough to hold size elements.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
105 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
106 static int COVER_map_init(COVER_map_t *map, U32 size) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
107 map->sizeLog = ZSTD_highbit32(size) + 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
108 map->size = (U32)1 << map->sizeLog;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
109 map->sizeMask = map->size - 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
110 map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
111 if (!map->data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
112 map->sizeLog = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
113 map->size = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
114 return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
115 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
116 COVER_map_clear(map);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
117 return 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
118 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
119
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
120 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
121 * Internal hash function
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
122 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
123 static const U32 prime4bytes = 2654435761U;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
124 static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
125 return (key * prime4bytes) >> (32 - map->sizeLog);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
126 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
127
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
128 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
129 * Helper function that returns the index that a key should be placed into.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
130 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
131 static U32 COVER_map_index(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
132 const U32 hash = COVER_map_hash(map, key);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
133 U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
134 for (i = hash;; i = (i + 1) & map->sizeMask) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
135 COVER_map_pair_t *pos = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
136 if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
137 return i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
138 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
139 if (pos->key == key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
140 return i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
141 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
142 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
143 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
144
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
145 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
146 * Returns the pointer to the value for key.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
147 * If key is not in the map, it is inserted and the value is set to 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
148 * The map must not be full.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
149 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
150 static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
151 COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
152 if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
153 pos->key = key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
154 pos->value = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
155 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
156 return &pos->value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
157 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
158
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
159 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
160 * Deletes key from the map if present.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
161 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
162 static void COVER_map_remove(COVER_map_t *map, U32 key) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
163 U32 i = COVER_map_index(map, key);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
164 COVER_map_pair_t *del = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
165 U32 shift = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
166 if (del->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
167 return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
168 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
169 for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
170 COVER_map_pair_t *const pos = &map->data[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
171 /* If the position is empty we are done */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
172 if (pos->value == MAP_EMPTY_VALUE) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
173 del->value = MAP_EMPTY_VALUE;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
174 return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
175 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
176 /* If pos can be moved to del do so */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
177 if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
178 del->key = pos->key;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
179 del->value = pos->value;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
180 del = pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
181 shift = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
182 } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
183 ++shift;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
184 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
185 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
186 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
187
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
188 /**
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
189 * Destroys a map that is inited with COVER_map_init().
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
190 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
191 static void COVER_map_destroy(COVER_map_t *map) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
192 if (map->data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
193 free(map->data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
194 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
195 map->data = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
196 map->size = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
197 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
198
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
199 /*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
200 * Context
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
201 ***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
202
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
203 typedef struct {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
204 const BYTE *samples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
205 size_t *offsets;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
206 const size_t *samplesSizes;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
207 size_t nbSamples;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
208 size_t nbTrainSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
209 size_t nbTestSamples;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
210 U32 *suffix;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
211 size_t suffixSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
212 U32 *freqs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
213 U32 *dmerAt;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
214 unsigned d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
215 } COVER_ctx_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
216
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
217 /* We need a global context for qsort... */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
218 static COVER_ctx_t *g_ctx = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
219
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
220 /*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
221 * Helper functions
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
222 ***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
223
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
224 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
225 * Returns the sum of the sample sizes.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
226 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
227 size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
228 size_t sum = 0;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
229 unsigned i;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
230 for (i = 0; i < nbSamples; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
231 sum += samplesSizes[i];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
232 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
233 return sum;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
234 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
235
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
236 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
237 * Returns -1 if the dmer at lp is less than the dmer at rp.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
238 * Return 0 if the dmers at lp and rp are equal.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
239 * Returns 1 if the dmer at lp is greater than the dmer at rp.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
240 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
241 static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
242 U32 const lhs = *(U32 const *)lp;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
243 U32 const rhs = *(U32 const *)rp;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
244 return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
245 }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
246 /**
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
247 * Faster version for d <= 8.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
248 */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
249 static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
250 U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
251 U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
252 U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
253 if (lhs < rhs) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
254 return -1;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
255 }
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
256 return (lhs > rhs);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
257 }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
258
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
259 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
260 * Same as COVER_cmp() except ties are broken by pointer value
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
261 * NOTE: g_ctx must be set to call this function. A global is required because
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
262 * qsort doesn't take an opaque pointer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
263 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
264 static int COVER_strict_cmp(const void *lp, const void *rp) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
265 int result = COVER_cmp(g_ctx, lp, rp);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
266 if (result == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
267 result = lp < rp ? -1 : 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
268 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
269 return result;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
270 }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
271 /**
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
272 * Faster version for d <= 8.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
273 */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
274 static int COVER_strict_cmp8(const void *lp, const void *rp) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
275 int result = COVER_cmp8(g_ctx, lp, rp);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
276 if (result == 0) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
277 result = lp < rp ? -1 : 1;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
278 }
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
279 return result;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
280 }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
281
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
282 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
283 * Returns the first pointer in [first, last) whose element does not compare
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
284 * less than value. If no such element exists it returns last.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
285 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
286 static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
287 size_t value) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
288 size_t count = last - first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
289 while (count != 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
290 size_t step = count / 2;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
291 const size_t *ptr = first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
292 ptr += step;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
293 if (*ptr < value) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
294 first = ++ptr;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
295 count -= step + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
296 } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
297 count = step;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
298 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
299 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
300 return first;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
301 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
302
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
303 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
304 * Generic groupBy function.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
305 * Groups an array sorted by cmp into groups with equivalent values.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
306 * Calls grp for each group.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
307 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
308 static void
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
309 COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
310 int (*cmp)(COVER_ctx_t *, const void *, const void *),
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
311 void (*grp)(COVER_ctx_t *, const void *, const void *)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
312 const BYTE *ptr = (const BYTE *)data;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
313 size_t num = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
314 while (num < count) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
315 const BYTE *grpEnd = ptr + size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
316 ++num;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
317 while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
318 grpEnd += size;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
319 ++num;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
320 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
321 grp(ctx, ptr, grpEnd);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
322 ptr = grpEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
323 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
324 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
325
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
326 /*-*************************************
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
327 * Cover functions
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
328 ***************************************/
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
329
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
330 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
331 * Called on each group of positions with the same dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
332 * Counts the frequency of each dmer and saves it in the suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
333 * Fills `ctx->dmerAt`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
334 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
335 static void COVER_group(COVER_ctx_t *ctx, const void *group,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
336 const void *groupEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
337 /* The group consists of all the positions with the same first d bytes. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
338 const U32 *grpPtr = (const U32 *)group;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
339 const U32 *grpEnd = (const U32 *)groupEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
340 /* The dmerId is how we will reference this dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
341 * This allows us to map the whole dmer space to a much smaller space, the
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
342 * size of the suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
343 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
344 const U32 dmerId = (U32)(grpPtr - ctx->suffix);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
345 /* Count the number of samples this dmer shows up in */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
346 U32 freq = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
347 /* Details */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
348 const size_t *curOffsetPtr = ctx->offsets;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
349 const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
350 /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
351 * different sample than the last.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
352 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
353 size_t curSampleEnd = ctx->offsets[0];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
354 for (; grpPtr != grpEnd; ++grpPtr) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
355 /* Save the dmerId for this position so we can get back to it. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
356 ctx->dmerAt[*grpPtr] = dmerId;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
357 /* Dictionaries only help for the first reference to the dmer.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
358 * After that zstd can reference the match from the previous reference.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
359 * So only count each dmer once for each sample it is in.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
360 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
361 if (*grpPtr < curSampleEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
362 continue;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
363 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
364 freq += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
365 /* Binary search to find the end of the sample *grpPtr is in.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
366 * In the common case that grpPtr + 1 == grpEnd we can skip the binary
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
367 * search because the loop is over.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
368 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
369 if (grpPtr + 1 != grpEnd) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
370 const size_t *sampleEndPtr =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
371 COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
372 curSampleEnd = *sampleEndPtr;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
373 curOffsetPtr = sampleEndPtr + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
374 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
375 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
376 /* At this point we are never going to look at this segment of the suffix
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
377 * array again. We take advantage of this fact to save memory.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
378 * We store the frequency of the dmer in the first position of the group,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
379 * which is dmerId.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
380 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
381 ctx->suffix[dmerId] = freq;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
382 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
383
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
384
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
385 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
386 * Selects the best segment in an epoch.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
387 * Segments of are scored according to the function:
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
388 *
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
389 * Let F(d) be the frequency of dmer d.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
390 * Let S_i be the dmer at position i of segment S which has length k.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
391 *
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
392 * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
393 *
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
394 * Once the dmer d is in the dictionary we set F(d) = 0.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
395 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
396 static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
397 COVER_map_t *activeDmers, U32 begin,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
398 U32 end,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
399 ZDICT_cover_params_t parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
400 /* Constants */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
401 const U32 k = parameters.k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
402 const U32 d = parameters.d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
403 const U32 dmersInK = k - d + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
404 /* Try each segment (activeSegment) and save the best (bestSegment) */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
405 COVER_segment_t bestSegment = {0, 0, 0};
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
406 COVER_segment_t activeSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
407 /* Reset the activeDmers in the segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
408 COVER_map_clear(activeDmers);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
409 /* The activeSegment starts at the beginning of the epoch. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
410 activeSegment.begin = begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
411 activeSegment.end = begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
412 activeSegment.score = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
413 /* Slide the activeSegment through the whole epoch.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
414 * Save the best segment in bestSegment.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
415 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
416 while (activeSegment.end < end) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
417 /* The dmerId for the dmer at the next position */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
418 U32 newDmer = ctx->dmerAt[activeSegment.end];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
419 /* The entry in activeDmers for this dmerId */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
420 U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
421 /* If the dmer isn't already present in the segment add its score. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
422 if (*newDmerOcc == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
423 /* The paper suggest using the L-0.5 norm, but experiments show that it
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
424 * doesn't help.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
425 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
426 activeSegment.score += freqs[newDmer];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
427 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
428 /* Add the dmer to the segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
429 activeSegment.end += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
430 *newDmerOcc += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
431
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
432 /* If the window is now too large, drop the first position */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
433 if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
434 U32 delDmer = ctx->dmerAt[activeSegment.begin];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
435 U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
436 activeSegment.begin += 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
437 *delDmerOcc -= 1;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
438 /* If this is the last occurrence of the dmer, subtract its score */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
439 if (*delDmerOcc == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
440 COVER_map_remove(activeDmers, delDmer);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
441 activeSegment.score -= freqs[delDmer];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
442 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
443 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
444
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
445 /* If this segment is the best so far save it */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
446 if (activeSegment.score > bestSegment.score) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
447 bestSegment = activeSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
448 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
449 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
450 {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
451 /* Trim off the zero frequency head and tail from the segment. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
452 U32 newBegin = bestSegment.end;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
453 U32 newEnd = bestSegment.begin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
454 U32 pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
455 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
456 U32 freq = freqs[ctx->dmerAt[pos]];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
457 if (freq != 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
458 newBegin = MIN(newBegin, pos);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
459 newEnd = pos + 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
460 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
461 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
462 bestSegment.begin = newBegin;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
463 bestSegment.end = newEnd;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
464 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
465 {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
466 /* Zero out the frequency of each dmer covered by the chosen segment. */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
467 U32 pos;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
468 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
469 freqs[ctx->dmerAt[pos]] = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
470 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
471 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
472 return bestSegment;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
473 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
474
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
475 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
476 * Check the validity of the parameters.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
477 * Returns non-zero if the parameters are valid and 0 otherwise.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
478 */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
479 static int COVER_checkParameters(ZDICT_cover_params_t parameters,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
480 size_t maxDictSize) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
481 /* k and d are required parameters */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
482 if (parameters.d == 0 || parameters.k == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
483 return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
484 }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
485 /* k <= maxDictSize */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
486 if (parameters.k > maxDictSize) {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
487 return 0;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
488 }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
489 /* d <= k */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
490 if (parameters.d > parameters.k) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
491 return 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
492 }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
493 /* 0 < splitPoint <= 1 */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
494 if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
495 return 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
496 }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
497 return 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
498 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
499
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
500 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
501 * Clean up a context initialized with `COVER_ctx_init()`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
502 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
503 static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
504 if (!ctx) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
505 return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
506 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
507 if (ctx->suffix) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
508 free(ctx->suffix);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
509 ctx->suffix = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
510 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
511 if (ctx->freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
512 free(ctx->freqs);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
513 ctx->freqs = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
514 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
515 if (ctx->dmerAt) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
516 free(ctx->dmerAt);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
517 ctx->dmerAt = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
518 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
519 if (ctx->offsets) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
520 free(ctx->offsets);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
521 ctx->offsets = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
522 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
523 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
524
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
525 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
526 * Prepare a context for dictionary building.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
527 * The context is only dependent on the parameter `d` and can used multiple
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
528 * times.
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
529 * Returns 0 on success or error code on error.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
530 * The context must be destroyed with `COVER_ctx_destroy()`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
531 */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
532 static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
533 const size_t *samplesSizes, unsigned nbSamples,
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
534 unsigned d, double splitPoint) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
535 const BYTE *const samples = (const BYTE *)samplesBuffer;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
536 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
537 /* Split samples into testing and training sets */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
538 const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
539 const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
540 const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
541 const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
542 /* Checks */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
543 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
544 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
545 DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
546 (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
547 return ERROR(srcSize_wrong);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
548 }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
549 /* Check if there are at least 5 training samples */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
550 if (nbTrainSamples < 5) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
551 DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
552 return ERROR(srcSize_wrong);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
553 }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
554 /* Check if there's testing sample */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
555 if (nbTestSamples < 1) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
556 DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
557 return ERROR(srcSize_wrong);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
558 }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
559 /* Zero the context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
560 memset(ctx, 0, sizeof(*ctx));
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
561 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
562 (unsigned)trainingSamplesSize);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
563 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
564 (unsigned)testSamplesSize);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
565 ctx->samples = samples;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
566 ctx->samplesSizes = samplesSizes;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
567 ctx->nbSamples = nbSamples;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
568 ctx->nbTrainSamples = nbTrainSamples;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
569 ctx->nbTestSamples = nbTestSamples;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
570 /* Partial suffix array */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
571 ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
572 ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
573 /* Maps index to the dmerID */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
574 ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
575 /* The offsets of each file */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
576 ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
577 if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
578 DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
579 COVER_ctx_destroy(ctx);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
580 return ERROR(memory_allocation);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
581 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
582 ctx->freqs = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
583 ctx->d = d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
584
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
585 /* Fill offsets from the samplesSizes */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
586 {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
587 U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
588 ctx->offsets[0] = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
589 for (i = 1; i <= nbSamples; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
590 ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
591 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
592 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
593 DISPLAYLEVEL(2, "Constructing partial suffix array\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
594 {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
595 /* suffix is a partial suffix array.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
596 * It only sorts suffixes by their first parameters.d bytes.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
597 * The sort is stable, so each dmer group is sorted by position in input.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
598 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
599 U32 i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
600 for (i = 0; i < ctx->suffixSize; ++i) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
601 ctx->suffix[i] = i;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
602 }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
603 /* qsort doesn't take an opaque pointer, so pass as a global.
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
604 * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
605 */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
606 g_ctx = ctx;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
607 #if defined(__OpenBSD__)
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
608 mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
609 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
610 #else
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
611 qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
612 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
613 #endif
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
614 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
615 DISPLAYLEVEL(2, "Computing frequencies\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
616 /* For each dmer group (group of positions with the same first d bytes):
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
617 * 1. For each position we set dmerAt[position] = dmerID. The dmerID is
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
618 * (groupBeginPtr - suffix). This allows us to go from position to
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
619 * dmerID so we can look up values in freq.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
620 * 2. We calculate how many samples the dmer occurs in and save it in
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
621 * freqs[dmerId].
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
622 */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
623 COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
624 (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
625 ctx->freqs = ctx->suffix;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
626 ctx->suffix = NULL;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
627 return 0;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
628 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
629
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
630 void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
631 {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
632 const double ratio = (double)nbDmers / maxDictSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
633 if (ratio >= 10) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
634 return;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
635 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
636 LOCALDISPLAYLEVEL(displayLevel, 1,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
637 "WARNING: The maximum dictionary size %u is too large "
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
638 "compared to the source size %u! "
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
639 "size(source)/size(dictionary) = %f, but it should be >= "
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
640 "10! This may lead to a subpar dictionary! We recommend "
43994
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
641 "training on sources at least 10x, and preferably 100x "
de7838053207 zstandard: vendor python-zstandard 0.13.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42937
diff changeset
642 "the size of the dictionary! \n", (U32)maxDictSize,
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
643 (U32)nbDmers, ratio);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
644 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
645
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
646 COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
647 U32 nbDmers, U32 k, U32 passes)
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
648 {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
649 const U32 minEpochSize = k * 10;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
650 COVER_epoch_info_t epochs;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
651 epochs.num = MAX(1, maxDictSize / k / passes);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
652 epochs.size = nbDmers / epochs.num;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
653 if (epochs.size >= minEpochSize) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
654 assert(epochs.size * epochs.num <= nbDmers);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
655 return epochs;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
656 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
657 epochs.size = MIN(minEpochSize, nbDmers);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
658 epochs.num = nbDmers / epochs.size;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
659 assert(epochs.size * epochs.num <= nbDmers);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
660 return epochs;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
661 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
662
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
663 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
664 * Given the prepared context build the dictionary.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
665 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
666 static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
667 COVER_map_t *activeDmers, void *dictBuffer,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
668 size_t dictBufferCapacity,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
669 ZDICT_cover_params_t parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
670 BYTE *const dict = (BYTE *)dictBuffer;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
671 size_t tail = dictBufferCapacity;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
672 /* Divide the data into epochs. We will select one segment from each epoch. */
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
673 const COVER_epoch_info_t epochs = COVER_computeEpochs(
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
674 (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
675 const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
676 size_t zeroScoreRun = 0;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
677 size_t epoch;
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
678 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
679 (U32)epochs.num, (U32)epochs.size);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
680 /* Loop through the epochs until there are no more segments or the dictionary
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
681 * is full.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
682 */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
683 for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
684 const U32 epochBegin = (U32)(epoch * epochs.size);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
685 const U32 epochEnd = epochBegin + epochs.size;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
686 size_t segmentSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
687 /* Select a segment */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
688 COVER_segment_t segment = COVER_selectSegment(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
689 ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
690 /* If the segment covers no dmers, then we are out of content.
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
691 * There may be new content in other epochs, for continue for some time.
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
692 */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
693 if (segment.score == 0) {
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
694 if (++zeroScoreRun >= maxZeroScoreRun) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
695 break;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
696 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
697 continue;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
698 }
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
699 zeroScoreRun = 0;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
700 /* Trim the segment if necessary and if it is too small then we are done */
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
701 segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
702 if (segmentSize < parameters.d) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
703 break;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
704 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
705 /* We fill the dictionary from the back to allow the best segments to be
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
706 * referenced with the smallest offsets.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
707 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
708 tail -= segmentSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
709 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
710 DISPLAYUPDATE(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
711 2, "\r%u%% ",
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
712 (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
713 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
714 DISPLAYLEVEL(2, "\r%79s\r", "");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
715 return tail;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
716 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
717
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
718 ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
719 void *dictBuffer, size_t dictBufferCapacity,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
720 const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
721 ZDICT_cover_params_t parameters)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
722 {
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
723 BYTE* const dict = (BYTE*)dictBuffer;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
724 COVER_ctx_t ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
725 COVER_map_t activeDmers;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
726 parameters.splitPoint = 1.0;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
727 /* Initialize global data */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
728 g_displayLevel = parameters.zParams.notificationLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
729 /* Checks */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
730 if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
731 DISPLAYLEVEL(1, "Cover parameters incorrect\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
732 return ERROR(parameter_outOfBound);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
733 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
734 if (nbSamples == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
735 DISPLAYLEVEL(1, "Cover must have at least one input file\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
736 return ERROR(srcSize_wrong);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
737 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
738 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
739 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
740 ZDICT_DICTSIZE_MIN);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
741 return ERROR(dstSize_tooSmall);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
742 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
743 /* Initialize context and activeDmers */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
744 {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
745 size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
746 parameters.d, parameters.splitPoint);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
747 if (ZSTD_isError(initVal)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
748 return initVal;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
749 }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
750 }
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
751 COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
752 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
753 DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
754 COVER_ctx_destroy(&ctx);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
755 return ERROR(memory_allocation);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
756 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
757
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
758 DISPLAYLEVEL(2, "Building dictionary\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
759 {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
760 const size_t tail =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
761 COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
762 dictBufferCapacity, parameters);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
763 const size_t dictionarySize = ZDICT_finalizeDictionary(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
764 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
765 samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
766 if (!ZSTD_isError(dictionarySize)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
767 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
768 (unsigned)dictionarySize);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
769 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
770 COVER_ctx_destroy(&ctx);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
771 COVER_map_destroy(&activeDmers);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
772 return dictionarySize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
773 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
774 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
775
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
776
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
777
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
778 size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
779 const size_t *samplesSizes, const BYTE *samples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
780 size_t *offsets,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
781 size_t nbTrainSamples, size_t nbSamples,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
782 BYTE *const dict, size_t dictBufferCapacity) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
783 size_t totalCompressedSize = ERROR(GENERIC);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
784 /* Pointers */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
785 ZSTD_CCtx *cctx;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
786 ZSTD_CDict *cdict;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
787 void *dst;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
788 /* Local variables */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
789 size_t dstCapacity;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
790 size_t i;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
791 /* Allocate dst with enough space to compress the maximum sized sample */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
792 {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
793 size_t maxSampleSize = 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
794 i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
795 for (; i < nbSamples; ++i) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
796 maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
797 }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
798 dstCapacity = ZSTD_compressBound(maxSampleSize);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
799 dst = malloc(dstCapacity);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
800 }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
801 /* Create the cctx and cdict */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
802 cctx = ZSTD_createCCtx();
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
803 cdict = ZSTD_createCDict(dict, dictBufferCapacity,
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
804 parameters.zParams.compressionLevel);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
805 if (!dst || !cctx || !cdict) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
806 goto _compressCleanup;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
807 }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
808 /* Compress each sample and sum their sizes (or error) */
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
809 totalCompressedSize = dictBufferCapacity;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
810 i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
811 for (; i < nbSamples; ++i) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
812 const size_t size = ZSTD_compress_usingCDict(
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
813 cctx, dst, dstCapacity, samples + offsets[i],
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
814 samplesSizes[i], cdict);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
815 if (ZSTD_isError(size)) {
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
816 totalCompressedSize = size;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
817 goto _compressCleanup;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
818 }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
819 totalCompressedSize += size;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
820 }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
821 _compressCleanup:
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
822 ZSTD_freeCCtx(cctx);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
823 ZSTD_freeCDict(cdict);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
824 if (dst) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
825 free(dst);
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
826 }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
827 return totalCompressedSize;
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
828 }
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
829
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
830
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
831 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
832 * Initialize the `COVER_best_t`.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
833 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
834 void COVER_best_init(COVER_best_t *best) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
835 if (best==NULL) return; /* compatible with init on NULL */
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
836 (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
837 (void)ZSTD_pthread_cond_init(&best->cond, NULL);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
838 best->liveJobs = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
839 best->dict = NULL;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
840 best->dictSize = 0;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
841 best->compressedSize = (size_t)-1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
842 memset(&best->parameters, 0, sizeof(best->parameters));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
843 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
844
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
845 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
846 * Wait until liveJobs == 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
847 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
848 void COVER_best_wait(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
849 if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
850 return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
851 }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
852 ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
853 while (best->liveJobs != 0) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
854 ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
855 }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
856 ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
857 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
858
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
859 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
860 * Call COVER_best_wait() and then destroy the COVER_best_t.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
861 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
862 void COVER_best_destroy(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
863 if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
864 return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
865 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
866 COVER_best_wait(best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
867 if (best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
868 free(best->dict);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
869 }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
870 ZSTD_pthread_mutex_destroy(&best->mutex);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
871 ZSTD_pthread_cond_destroy(&best->cond);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
872 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
873
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
874 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
875 * Called when a thread is about to be launched.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
876 * Increments liveJobs.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
877 */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
878 void COVER_best_start(COVER_best_t *best) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
879 if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
880 return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
881 }
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
882 ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
883 ++best->liveJobs;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
884 ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
885 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
886
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
887 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
888 * Called when a thread finishes executing, both on error or success.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
889 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
890 * If this dictionary is the best so far save it and its parameters.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
891 */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
892 void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
893 COVER_dictSelection_t selection) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
894 void* dict = selection.dictContent;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
895 size_t compressedSize = selection.totalCompressedSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
896 size_t dictSize = selection.dictSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
897 if (!best) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
898 return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
899 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
900 {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
901 size_t liveJobs;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
902 ZSTD_pthread_mutex_lock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
903 --best->liveJobs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
904 liveJobs = best->liveJobs;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
905 /* If the new dictionary is better */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
906 if (compressedSize < best->compressedSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
907 /* Allocate space if necessary */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
908 if (!best->dict || best->dictSize < dictSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
909 if (best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
910 free(best->dict);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
911 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
912 best->dict = malloc(dictSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
913 if (!best->dict) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
914 best->compressedSize = ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
915 best->dictSize = 0;
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
916 ZSTD_pthread_cond_signal(&best->cond);
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
917 ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
918 return;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
919 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
920 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
921 /* Save the dictionary, parameters, and size */
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
922 if (dict) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
923 memcpy(best->dict, dict, dictSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
924 best->dictSize = dictSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
925 best->parameters = parameters;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
926 best->compressedSize = compressedSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
927 }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
928 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
929 if (liveJobs == 0) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
930 ZSTD_pthread_cond_broadcast(&best->cond);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
931 }
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
932 ZSTD_pthread_mutex_unlock(&best->mutex);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
933 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
934 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
935
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
936 COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
937 COVER_dictSelection_t selection = { NULL, 0, error };
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
938 return selection;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
939 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
940
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
941 unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
942 return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
943 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
944
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
945 void COVER_dictSelectionFree(COVER_dictSelection_t selection){
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
946 free(selection.dictContent);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
947 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
948
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
949 COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
950 size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
951 size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
952
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
953 size_t largestDict = 0;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
954 size_t largestCompressed = 0;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
955 BYTE* customDictContentEnd = customDictContent + dictContentSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
956
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
957 BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
958 BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
959 double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
960
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
961 if (!largestDictbuffer || !candidateDictBuffer) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
962 free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
963 free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
964 return COVER_dictSelectionError(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
965 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
966
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
967 /* Initial dictionary size and compressed size */
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
968 memcpy(largestDictbuffer, customDictContent, dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
969 dictContentSize = ZDICT_finalizeDictionary(
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
970 largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
971 samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
972
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
973 if (ZDICT_isError(dictContentSize)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
974 free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
975 free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
976 return COVER_dictSelectionError(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
977 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
978
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
979 totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
980 samplesBuffer, offsets,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
981 nbCheckSamples, nbSamples,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
982 largestDictbuffer, dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
983
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
984 if (ZSTD_isError(totalCompressedSize)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
985 free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
986 free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
987 return COVER_dictSelectionError(totalCompressedSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
988 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
989
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
990 if (params.shrinkDict == 0) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
991 COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
992 free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
993 return selection;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
994 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
995
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
996 largestDict = dictContentSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
997 largestCompressed = totalCompressedSize;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
998 dictContentSize = ZDICT_DICTSIZE_MIN;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
999
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1000 /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1001 while (dictContentSize < largestDict) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1002 memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1003 dictContentSize = ZDICT_finalizeDictionary(
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1004 candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1005 samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1006
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1007 if (ZDICT_isError(dictContentSize)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1008 free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1009 free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1010 return COVER_dictSelectionError(dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1011
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1012 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1013
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1014 totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1015 samplesBuffer, offsets,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1016 nbCheckSamples, nbSamples,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1017 candidateDictBuffer, dictContentSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1018
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1019 if (ZSTD_isError(totalCompressedSize)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1020 free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1021 free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1022 return COVER_dictSelectionError(totalCompressedSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1023 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1024
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1025 if (totalCompressedSize <= largestCompressed * regressionTolerance) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1026 COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1027 free(largestDictbuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1028 return selection;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1029 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1030 dictContentSize *= 2;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1031 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1032 dictContentSize = largestDict;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1033 totalCompressedSize = largestCompressed;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1034 {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1035 COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1036 free(candidateDictBuffer);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1037 return selection;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1038 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1039 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1040
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1041 /**
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1042 * Parameters for COVER_tryParameters().
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1043 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1044 typedef struct COVER_tryParameters_data_s {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1045 const COVER_ctx_t *ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1046 COVER_best_t *best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1047 size_t dictBufferCapacity;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1048 ZDICT_cover_params_t parameters;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1049 } COVER_tryParameters_data_t;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1050
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1051 /**
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
1052 * Tries a set of parameters and updates the COVER_best_t with the results.
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1053 * This function is thread safe if zstd is compiled with multithreaded support.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1054 * It takes its parameters as an *OWNING* opaque pointer to support threading.
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1055 */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1056 static void COVER_tryParameters(void *opaque) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1057 /* Save parameters as local variables */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1058 COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1059 const COVER_ctx_t *const ctx = data->ctx;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1060 const ZDICT_cover_params_t parameters = data->parameters;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1061 size_t dictBufferCapacity = data->dictBufferCapacity;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1062 size_t totalCompressedSize = ERROR(GENERIC);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1063 /* Allocate space for hash table, dict, and freqs */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1064 COVER_map_t activeDmers;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1065 BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1066 COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1067 U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1068 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1069 DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1070 goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1071 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1072 if (!dict || !freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1073 DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1074 goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1075 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1076 /* Copy the frequencies because we need to modify them */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1077 memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1078 /* Build the dictionary */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1079 {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1080 const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1081 dictBufferCapacity, parameters);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1082 selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1083 ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1084 totalCompressedSize);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1085
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1086 if (COVER_dictSelectionIsError(selection)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1087 DISPLAYLEVEL(1, "Failed to select dictionary\n");
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1088 goto _cleanup;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1089 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1090 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1091 _cleanup:
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1092 free(dict);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1093 COVER_best_finish(data->best, parameters, selection);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1094 free(data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1095 COVER_map_destroy(&activeDmers);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1096 COVER_dictSelectionFree(selection);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1097 if (freqs) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1098 free(freqs);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1099 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1100 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1101
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1102 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1103 void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1104 const size_t *samplesSizes, unsigned nbSamples,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1105 ZDICT_cover_params_t *parameters) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1106 /* constants */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1107 const unsigned nbThreads = parameters->nbThreads;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
1108 const double splitPoint =
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
1109 parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1110 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1111 const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1112 const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1113 const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1114 const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1115 const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1116 const unsigned kIterations =
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1117 (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1118 const unsigned shrinkDict = 0;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1119 /* Local variables */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1120 const int displayLevel = parameters->zParams.notificationLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1121 unsigned iteration = 1;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1122 unsigned d;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1123 unsigned k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1124 COVER_best_t best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1125 POOL_ctx *pool = NULL;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1126 int warned = 0;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1127
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1128 /* Checks */
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
1129 if (splitPoint <= 0 || splitPoint > 1) {
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
1130 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1131 return ERROR(parameter_outOfBound);
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
1132 }
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1133 if (kMinK < kMaxD || kMaxK < kMinK) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1134 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1135 return ERROR(parameter_outOfBound);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1136 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1137 if (nbSamples == 0) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1138 DISPLAYLEVEL(1, "Cover must have at least one input file\n");
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1139 return ERROR(srcSize_wrong);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1140 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1141 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1142 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1143 ZDICT_DICTSIZE_MIN);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1144 return ERROR(dstSize_tooSmall);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1145 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1146 if (nbThreads > 1) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1147 pool = POOL_create(nbThreads, 1);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1148 if (!pool) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1149 return ERROR(memory_allocation);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1150 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1151 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1152 /* Initialization */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1153 COVER_best_init(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1154 /* Turn down global display level to clean up display at level 2 and below */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1155 g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1156 /* Loop through d first because each new value needs a new context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1157 LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1158 kIterations);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1159 for (d = kMinD; d <= kMaxD; d += 2) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1160 /* Initialize the context for this value of d */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1161 COVER_ctx_t ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1162 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1163 {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1164 const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1165 if (ZSTD_isError(initVal)) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1166 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1167 COVER_best_destroy(&best);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1168 POOL_free(pool);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1169 return initVal;
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1170 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1171 }
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1172 if (!warned) {
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1173 COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1174 warned = 1;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1175 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1176 /* Loop through k reusing the same context */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1177 for (k = kMinK; k <= kMaxK; k += kStepSize) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1178 /* Prepare the arguments */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1179 COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1180 sizeof(COVER_tryParameters_data_t));
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1181 LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1182 if (!data) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1183 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1184 COVER_best_destroy(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1185 COVER_ctx_destroy(&ctx);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1186 POOL_free(pool);
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1187 return ERROR(memory_allocation);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1188 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1189 data->ctx = &ctx;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1190 data->best = &best;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1191 data->dictBufferCapacity = dictBufferCapacity;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1192 data->parameters = *parameters;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1193 data->parameters.k = k;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1194 data->parameters.d = d;
40121
73fef626dae3 zstandard: vendor python-zstandard 0.10.1
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37495
diff changeset
1195 data->parameters.splitPoint = splitPoint;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1196 data->parameters.steps = kSteps;
42937
69de49c4e39c zstandard: vendor python-zstandard 0.12
Gregory Szorc <gregory.szorc@gmail.com>
parents: 42070
diff changeset
1197 data->parameters.shrinkDict = shrinkDict;
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1198 data->parameters.zParams.notificationLevel = g_displayLevel;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1199 /* Check the parameters */
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1200 if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1201 DISPLAYLEVEL(1, "Cover parameters incorrect\n");
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1202 free(data);
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1203 continue;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1204 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1205 /* Call the function and pass ownership of data to it */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1206 COVER_best_start(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1207 if (pool) {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1208 POOL_add(pool, &COVER_tryParameters, data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1209 } else {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1210 COVER_tryParameters(data);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1211 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1212 /* Print status */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1213 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
42070
675775c33ab6 zstandard: vendor python-zstandard 0.11
Gregory Szorc <gregory.szorc@gmail.com>
parents: 40121
diff changeset
1214 (unsigned)((iteration * 100) / kIterations));
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1215 ++iteration;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1216 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1217 COVER_best_wait(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1218 COVER_ctx_destroy(&ctx);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1219 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1220 LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1221 /* Fill the output buffer and parameters with output of the best parameters */
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1222 {
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1223 const size_t dictSize = best.dictSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1224 if (ZSTD_isError(best.compressedSize)) {
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1225 const size_t compressedSize = best.compressedSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1226 COVER_best_destroy(&best);
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1227 POOL_free(pool);
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30895
diff changeset
1228 return compressedSize;
30895
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1229 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1230 *parameters = best.parameters;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1231 memcpy(dictBuffer, best.dict, dictSize);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1232 COVER_best_destroy(&best);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1233 POOL_free(pool);
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1234 return dictSize;
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1235 }
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
1236 }