Mercurial > public > mercurial-scm > hg
annotate mercurial/cext/charencode.c @ 33757:e9996bd7203f
cext: split character encoding functions to new compilation unit
This extracts charencode.c from parsers.c, which seems big enough for me
to hesitate to add new JSON functions. Still charencode.o is linked to
parsers.so to avoid duplication of binary codes.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Mon, 31 Jul 2017 22:28:27 +0900 |
parents | mercurial/cext/parsers.c@5866ba5e9c48 |
children | 0f4ac3b6dee4 |
rev | line source |
---|---|
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
1 /* |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
2 charencode.c - miscellaneous character encoding |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
3 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
5 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
6 This software may be used and distributed according to the terms of |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
7 the GNU General Public License, incorporated herein by reference. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
8 */ |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
9 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
10 #include <Python.h> |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
11 |
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
12 #include "util.h" |
20742
3681de20b0a7
parsers: fail fast if Python has wrong minor version (issue4110)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20555
diff
changeset
|
13 |
32386
7640584e697c
cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents:
32384
diff
changeset
|
14 static const char lowertable[128] = { |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
15 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
16 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
17 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
18 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
19 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
20 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
21 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
22 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
23 '\x40', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
24 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
25 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
26 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
27 '\x78', '\x79', '\x7a', /* X-Z */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
28 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
29 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
30 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
31 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
32 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
33 }; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
34 |
32386
7640584e697c
cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents:
32384
diff
changeset
|
35 static const char uppertable[128] = { |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
36 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
37 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
38 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
39 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
40 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
41 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
42 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
43 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
44 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
45 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
46 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
47 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
48 '\x60', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
49 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
50 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
51 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
52 '\x58', '\x59', '\x5a', /* x-z */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
53 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
54 }; |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
55 |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
56 /* |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
57 * Turn a hex-encoded string into binary. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
58 */ |
24214
a5f1bccd2996
manifest.c: new extension code to lazily parse manifests
Augie Fackler <augie@google.com>
parents:
24032
diff
changeset
|
59 PyObject *unhexlify(const char *str, int len) |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
60 { |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
61 PyObject *ret; |
6395
3f0294536b24
fix const annotation warning
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
6389
diff
changeset
|
62 char *d; |
16617
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
63 int i; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
64 |
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
65 ret = PyBytes_FromStringAndSize(NULL, len / 2); |
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
66 |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
67 if (!ret) |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
68 return NULL; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
69 |
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
70 d = PyBytes_AsString(ret); |
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
71 |
16617
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
72 for (i = 0; i < len;) { |
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
73 int hi = hexdigit(str, i++); |
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
74 int lo = hexdigit(str, i++); |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
75 *d++ = (hi << 4) | lo; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
76 } |
7091
12b35ae03365
parsers: clean up whitespace
Matt Mackall <mpm@selenic.com>
parents:
6395
diff
changeset
|
77 |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
78 return ret; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
79 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
80 |
24576
fe173106e7fe
parsers: make _asciilower a generic _asciitransform function
Siddharth Agarwal <sid0@fb.com>
parents:
24575
diff
changeset
|
81 static inline PyObject *_asciitransform(PyObject *str_obj, |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
82 const char table[128], |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
83 PyObject *fallback_fn) |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
84 { |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
85 char *str, *newstr; |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
86 Py_ssize_t i, len; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
87 PyObject *newobj = NULL; |
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
88 PyObject *ret = NULL; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
89 |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
90 str = PyBytes_AS_STRING(str_obj); |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
91 len = PyBytes_GET_SIZE(str_obj); |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
92 |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
93 newobj = PyBytes_FromStringAndSize(NULL, len); |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
94 if (!newobj) |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
95 goto quit; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
96 |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
97 newstr = PyBytes_AS_STRING(newobj); |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
98 |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
99 for (i = 0; i < len; i++) { |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
100 char c = str[i]; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
101 if (c & 0x80) { |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
102 if (fallback_fn != NULL) { |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
103 ret = PyObject_CallFunctionObjArgs(fallback_fn, |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
104 str_obj, NULL); |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
105 } else { |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
106 PyObject *err = PyUnicodeDecodeError_Create( |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
107 "ascii", str, len, i, (i + 1), |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
108 "unexpected code byte"); |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
109 PyErr_SetObject(PyExc_UnicodeDecodeError, err); |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
110 Py_XDECREF(err); |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
111 } |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
112 goto quit; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
113 } |
24576
fe173106e7fe
parsers: make _asciilower a generic _asciitransform function
Siddharth Agarwal <sid0@fb.com>
parents:
24575
diff
changeset
|
114 newstr[i] = table[(unsigned char)c]; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
115 } |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
116 |
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
117 ret = newobj; |
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
118 Py_INCREF(ret); |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
119 quit: |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
120 Py_XDECREF(newobj); |
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
121 return ret; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
122 } |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
123 |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
124 PyObject *asciilower(PyObject *self, PyObject *args) |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
125 { |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
126 PyObject *str_obj; |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
127 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
128 return NULL; |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
129 return _asciitransform(str_obj, lowertable, NULL); |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
130 } |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
131 |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
132 PyObject *asciiupper(PyObject *self, PyObject *args) |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
133 { |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
134 PyObject *str_obj; |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
135 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
136 return NULL; |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
137 return _asciitransform(str_obj, uppertable, NULL); |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
138 } |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
139 |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
140 PyObject *make_file_foldmap(PyObject *self, PyObject *args) |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
141 { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
142 PyObject *dmap, *spec_obj, *normcase_fallback; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
143 PyObject *file_foldmap = NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
144 enum normcase_spec spec; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
145 PyObject *k, *v; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
146 dirstateTupleObject *tuple; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
147 Py_ssize_t pos = 0; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
148 const char *table; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
149 |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
150 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
151 &PyDict_Type, &dmap, |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
152 &PyInt_Type, &spec_obj, |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
153 &PyFunction_Type, &normcase_fallback)) |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
154 goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
155 |
24622
1e05f11619bb
parsers.c: avoid implicit conversion loses integer precision warning
Andr? Sintzoff <andre.sintzoff@gmail.com>
parents:
24609
diff
changeset
|
156 spec = (int)PyInt_AS_LONG(spec_obj); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
157 switch (spec) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
158 case NORMCASE_LOWER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
159 table = lowertable; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
160 break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
161 case NORMCASE_UPPER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
162 table = uppertable; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
163 break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
164 case NORMCASE_OTHER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
165 table = NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
166 break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
167 default: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
168 PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
169 goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
170 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
171 |
25583
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
172 /* Add some more entries to deal with additions outside this |
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
173 function. */ |
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
174 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
175 if (file_foldmap == NULL) |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
176 goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
177 |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
178 while (PyDict_Next(dmap, &pos, &k, &v)) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
179 if (!dirstate_tuple_check(v)) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
180 PyErr_SetString(PyExc_TypeError, |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
181 "expected a dirstate tuple"); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
182 goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
183 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
184 |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
185 tuple = (dirstateTupleObject *)v; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
186 if (tuple->state != 'r') { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
187 PyObject *normed; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
188 if (table != NULL) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
189 normed = _asciitransform(k, table, |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
190 normcase_fallback); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
191 } else { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
192 normed = PyObject_CallFunctionObjArgs( |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
193 normcase_fallback, k, NULL); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
194 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
195 |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
196 if (normed == NULL) |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
197 goto quit; |
26049
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
198 if (PyDict_SetItem(file_foldmap, normed, k) == -1) { |
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
199 Py_DECREF(normed); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
200 goto quit; |
26049
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
201 } |
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
202 Py_DECREF(normed); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
203 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
204 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
205 return file_foldmap; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
206 quit: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
207 Py_XDECREF(file_foldmap); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
208 return NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
209 } |