Mercurial > public > mercurial-scm > hg-stable
annotate mercurial/cext/base85.c @ 39883:3e896b51aa5d
storageutil: move metadata parsing and packing from revlog (API)
Parsing and writing of revision text metadata is likely identical
across storage backends. Let's move the code out of revlog so we
don't need to import the revlog module in order to use it.
Differential Revision: https://phab.mercurial-scm.org/D4754
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 24 Sep 2018 14:31:31 -0700 |
parents | 91477b123700 |
children | 763b45bc4483 |
rev | line source |
---|---|
3283 | 1 /* |
2 base85 codec | |
3 | |
4 Copyright 2006 Brendan Cully <brendan@kublai.com> | |
5 | |
6 This software may be used and distributed according to the terms of | |
7 the GNU General Public License, incorporated herein by reference. | |
8 | |
9 Largely based on git's implementation | |
10 */ | |
11 | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
12 #define PY_SSIZE_T_CLEAN |
3283 | 13 #include <Python.h> |
14 | |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
15 #include "util.h" |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
16 |
36264
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
17 static const char b85chars[] = |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
18 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
19 "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; |
3283 | 20 static char b85dec[256]; |
21 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
22 static void b85prep(void) |
3283 | 23 { |
26074
c1aefe57cf4e
base85: fix comparison of an int and a long
Augie Fackler <augie@google.com>
parents:
16848
diff
changeset
|
24 unsigned i; |
3283 | 25 |
26 memset(b85dec, 0, sizeof(b85dec)); | |
27 for (i = 0; i < sizeof(b85chars); i++) | |
28 b85dec[(int)(b85chars[i])] = i + 1; | |
29 } | |
30 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
31 static PyObject *b85encode(PyObject *self, PyObject *args) |
3283 | 32 { |
33 const unsigned char *text; | |
34 PyObject *out; | |
35 char *dst; | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
36 Py_ssize_t len, olen, i; |
3283 | 37 unsigned int acc, val, ch; |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
38 int pad = 0; |
3283 | 39 |
36649
186c6df3a373
py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()
Yuya Nishihara <yuya@tcha.org>
parents:
36264
diff
changeset
|
40 if (!PyArg_ParseTuple(args, PY23("s#|i", "y#|i"), &text, &len, &pad)) |
3283 | 41 return NULL; |
42 | |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
43 if (pad) |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
44 olen = ((len + 3) / 4 * 5) - 3; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
45 else { |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
46 olen = len % 4; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
47 if (olen) |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
48 olen++; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
49 olen += len / 4 * 5; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
50 } |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
51 if (!(out = PyBytes_FromStringAndSize(NULL, olen + 3))) |
3283 | 52 return NULL; |
53 | |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
54 dst = PyBytes_AsString(out); |
3283 | 55 |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
56 while (len) { |
3283 | 57 acc = 0; |
58 for (i = 24; i >= 0; i -= 8) { | |
59 ch = *text++; | |
60 acc |= ch << i; | |
61 if (--len == 0) | |
62 break; | |
63 } | |
64 for (i = 4; i >= 0; i--) { | |
65 val = acc % 85; | |
66 acc /= 85; | |
67 dst[i] = b85chars[val]; | |
68 } | |
69 dst += 5; | |
70 } | |
71 | |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
72 if (!pad) |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
73 _PyBytes_Resize(&out, olen); |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
74 |
3283 | 75 return out; |
76 } | |
77 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
78 static PyObject *b85decode(PyObject *self, PyObject *args) |
3283 | 79 { |
38783
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
80 PyObject *out = NULL; |
3283 | 81 const char *text; |
82 char *dst; | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
83 Py_ssize_t len, i, j, olen, cap; |
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
84 int c; |
3283 | 85 unsigned int acc; |
86 | |
36649
186c6df3a373
py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()
Yuya Nishihara <yuya@tcha.org>
parents:
36264
diff
changeset
|
87 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &len)) |
3283 | 88 return NULL; |
89 | |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
90 olen = len / 5 * 4; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
91 i = len % 5; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
92 if (i) |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
93 olen += i - 1; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
94 if (!(out = PyBytes_FromStringAndSize(NULL, olen))) |
3283 | 95 return NULL; |
96 | |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
97 dst = PyBytes_AsString(out); |
3283 | 98 |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
99 i = 0; |
34438
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32407
diff
changeset
|
100 while (i < len) { |
3283 | 101 acc = 0; |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
102 cap = len - i - 1; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
103 if (cap > 4) |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
104 cap = 4; |
34438
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32407
diff
changeset
|
105 for (j = 0; j < cap; i++, j++) { |
3283 | 106 c = b85dec[(int)*text++] - 1; |
38783
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
107 if (c < 0) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
108 PyErr_Format( |
36264
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
109 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
110 "bad base85 character at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
111 (int)i); |
38783
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
112 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
113 } |
3283 | 114 acc = acc * 85 + c; |
115 } | |
34438
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32407
diff
changeset
|
116 if (i++ < len) { |
3283 | 117 c = b85dec[(int)*text++] - 1; |
38783
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
118 if (c < 0) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
119 PyErr_Format( |
36264
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
120 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
121 "bad base85 character at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
122 (int)i); |
38783
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
123 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
124 } |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
125 /* overflow detection: 0xffffffff == "|NsC0", |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
126 * "|NsC" == 0x03030303 */ |
38783
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
127 if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
128 PyErr_Format( |
36264
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
129 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
130 "bad base85 sequence at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
131 (int)i); |
38783
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
132 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
133 } |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
134 acc += c; |
3283 | 135 } |
136 | |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
137 cap = olen < 4 ? olen : 4; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
138 olen -= cap; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
139 for (j = 0; j < 4 - cap; j++) |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
140 acc *= 85; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
141 if (cap && cap < 4) |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
142 acc += 0xffffff >> (cap - 1) * 8; |
34438
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32407
diff
changeset
|
143 for (j = 0; j < cap; j++) { |
3283 | 144 acc = (acc << 8) | (acc >> 24); |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
145 *dst++ = acc; |
3283 | 146 } |
147 } | |
148 | |
149 return out; | |
38783
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
150 bail: |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
151 Py_XDECREF(out); |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36649
diff
changeset
|
152 return NULL; |
3283 | 153 } |
154 | |
155 static char base85_doc[] = "Base85 Data Encoding"; | |
156 | |
157 static PyMethodDef methods[] = { | |
36264
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
158 {"b85encode", b85encode, METH_VARARGS, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
159 "Encode text in base85.\n\n" |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
160 "If the second parameter is true, pad the result to a multiple of " |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
161 "five characters.\n"}, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
162 {"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"}, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
163 {NULL, NULL}, |
3283 | 164 }; |
165 | |
32395
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
166 static const int version = 1; |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
167 |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
168 #ifdef IS_PY3K |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
169 static struct PyModuleDef base85_module = { |
36264
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34438
diff
changeset
|
170 PyModuleDef_HEAD_INIT, "base85", base85_doc, -1, methods, |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
171 }; |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
172 |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
173 PyMODINIT_FUNC PyInit_base85(void) |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
174 { |
32395
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
175 PyObject *m; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
176 b85prep(); |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
177 |
32395
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
178 m = PyModule_Create(&base85_module); |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
179 PyModule_AddIntConstant(m, "version", version); |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
180 return m; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
181 } |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
182 #else |
3283 | 183 PyMODINIT_FUNC initbase85(void) |
184 { | |
32395
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
185 PyObject *m; |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
186 m = Py_InitModule3("base85", methods, base85_doc); |
3283 | 187 |
188 b85prep(); | |
32395
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
189 PyModule_AddIntConstant(m, "version", version); |
3283 | 190 } |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
191 #endif |