Mercurial > public > mercurial-scm > hg-stable
annotate mercurial/parsers.c @ 7093:16bafcebd3d1
dirstate: C parsing extension
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Sun, 12 Oct 2008 15:21:08 -0500 |
parents | fb3fc27617a2 |
children | 1ca878d7b849 |
rev | line source |
---|---|
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
1 /* |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
2 parsers.c - efficient content parsing |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
3 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
5 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
6 This software may be used and distributed according to the terms of |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
7 the GNU General Public License, incorporated herein by reference. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
8 */ |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
9 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
10 #include <Python.h> |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
11 #include <ctype.h> |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
12 #include <string.h> |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
13 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
14 static int hexdigit(char c) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
15 { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
16 if (c >= '0' && c <= '9') |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
17 return c - '0'; |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
18 if (c >= 'a' && c <= 'f') |
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
19 return c - 'a' + 10; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
20 if (c >= 'A' && c <= 'F') |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
21 return c - 'A' + 10; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
22 |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
23 PyErr_SetString(PyExc_ValueError, "input contains non-hex character"); |
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
24 return 0; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
25 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
26 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
27 /* |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
28 * Turn a hex-encoded string into binary. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
29 */ |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
30 static PyObject *unhexlify(const char *str, int len) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
31 { |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
32 PyObject *ret; |
6395
3f0294536b24
fix const annotation warning
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
6389
diff
changeset
|
33 const char *c; |
3f0294536b24
fix const annotation warning
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
6389
diff
changeset
|
34 char *d; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
35 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
36 ret = PyString_FromStringAndSize(NULL, len / 2); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
37 if (!ret) |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
38 return NULL; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
39 |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
40 d = PyString_AS_STRING(ret); |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
41 for (c = str; c < str + len;) { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
42 int hi = hexdigit(*c++); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
43 int lo = hexdigit(*c++); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
44 *d++ = (hi << 4) | lo; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
45 } |
7091
12b35ae03365
parsers: clean up whitespace
Matt Mackall <mpm@selenic.com>
parents:
6395
diff
changeset
|
46 |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
47 return ret; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
48 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
49 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
50 /* |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
51 * This code assumes that a manifest is stitched together with newline |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
52 * ('\n') characters. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
53 */ |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
54 static PyObject *parse_manifest(PyObject *self, PyObject *args) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
55 { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
56 PyObject *mfdict, *fdict; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
57 char *str, *cur, *start, *zero; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
58 int len; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
59 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
60 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
61 &PyDict_Type, &mfdict, |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
62 &PyDict_Type, &fdict, |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
63 &str, &len)) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
64 goto quit; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
65 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
66 for (start = cur = str, zero = NULL; cur < str + len; cur++) { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
67 PyObject *file = NULL, *node = NULL; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
68 PyObject *flags = NULL; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
69 int nlen; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
70 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
71 if (!*cur) { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
72 zero = cur; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
73 continue; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
74 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
75 else if (*cur != '\n') |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
76 continue; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
77 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
78 if (!zero) { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
79 PyErr_SetString(PyExc_ValueError, |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
80 "manifest entry has no separator"); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
81 goto quit; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
82 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
83 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
84 file = PyString_FromStringAndSize(start, zero - start); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
85 if (!file) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
86 goto bail; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
87 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
88 nlen = cur - zero - 1; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
89 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
90 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
91 if (!node) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
92 goto bail; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
93 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
94 if (nlen > 40) { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
95 PyObject *flags; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
96 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
97 flags = PyString_FromStringAndSize(zero + 41, |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
98 nlen - 40); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
99 if (!flags) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
100 goto bail; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
101 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
102 if (PyDict_SetItem(fdict, file, flags) == -1) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
103 goto bail; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
104 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
105 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
106 if (PyDict_SetItem(mfdict, file, node) == -1) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
107 goto bail; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
108 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
109 start = cur + 1; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
110 zero = NULL; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
111 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
112 Py_XDECREF(flags); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
113 Py_XDECREF(node); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
114 Py_XDECREF(file); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
115 continue; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
116 bail: |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
117 Py_XDECREF(flags); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
118 Py_XDECREF(node); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
119 Py_XDECREF(file); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
120 goto quit; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
121 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
122 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
123 if (len > 0 && *(cur - 1) != '\n') { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
124 PyErr_SetString(PyExc_ValueError, |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
125 "manifest contains trailing garbage"); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
126 goto quit; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
127 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
128 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
129 Py_INCREF(Py_None); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
130 return Py_None; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
131 quit: |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
132 return NULL; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
133 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
134 |
7093
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
135 #ifdef _WIN32 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
136 # ifdef _MSC_VER |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
137 /* msvc 6.0 has problems */ |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
138 # define inline __inline |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
139 typedef unsigned long uint32_t; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
140 # else |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
141 # include <stdint.h> |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
142 # endif |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
143 static uint32_t ntohl(uint32_t x) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
144 { |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
145 return ((x & 0x000000ffUL) << 24) | |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
146 ((x & 0x0000ff00UL) << 8) | |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
147 ((x & 0x00ff0000UL) >> 8) | |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
148 ((x & 0xff000000UL) >> 24); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
149 } |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
150 #else |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
151 /* not windows */ |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
152 # include <sys/types.h> |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
153 # if defined __BEOS__ && !defined __HAIKU__ |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
154 # include <ByteOrder.h> |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
155 # else |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
156 # include <arpa/inet.h> |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
157 # endif |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
158 # include <inttypes.h> |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
159 #endif |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
160 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
161 static PyObject *parse_dirstate(PyObject *self, PyObject *args) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
162 { |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
163 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
164 PyObject *fname = NULL, *cname = NULL, *entry = NULL; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
165 char *str, *cur, *end, *cpos; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
166 int state, mode, size, mtime, flen; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
167 int len; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
168 char decode[16]; /* for alignment */ |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
169 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
170 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
171 &PyDict_Type, &dmap, |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
172 &PyDict_Type, &cmap, |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
173 &str, &len)) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
174 goto quit; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
175 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
176 /* read parents */ |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
177 if (len < 40) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
178 goto quit; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
179 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
180 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
181 if (!parents) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
182 goto quit; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
183 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
184 /* read filenames */ |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
185 cur = str + 40; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
186 end = str + len; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
187 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
188 while (cur < end - 17) { |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
189 /* unpack header */ |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
190 state = *cur; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
191 memcpy(decode, cur + 1, 16); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
192 mode = ntohl(*(uint32_t *)(decode)); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
193 size = ntohl(*(uint32_t *)(decode + 4)); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
194 mtime = ntohl(*(uint32_t *)(decode + 8)); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
195 flen = ntohl(*(uint32_t *)(decode + 12)); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
196 cur += 17; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
197 if (cur + flen > end) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
198 goto quit; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
199 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
200 entry = Py_BuildValue("ciii", state, mode, size, mtime); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
201 PyObject_GC_UnTrack(entry); /* don't waste time with this */ |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
202 if (!entry) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
203 goto quit; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
204 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
205 cpos = memchr(cur, 0, flen); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
206 if (cpos) { |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
207 fname = PyString_FromStringAndSize(cur, cpos - cur); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
208 cname = PyString_FromStringAndSize(cpos + 1, |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
209 flen - (cpos - cur) - 1); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
210 if (!fname || !cname || |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
211 PyDict_SetItem(cmap, fname, cname) == -1 || |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
212 PyDict_SetItem(dmap, fname, entry) == -1) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
213 goto quit; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
214 Py_DECREF(cname); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
215 } else { |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
216 fname = PyString_FromStringAndSize(cur, flen); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
217 if (!fname || |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
218 PyDict_SetItem(dmap, fname, entry) == -1) |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
219 goto quit; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
220 } |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
221 cur += flen; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
222 Py_DECREF(fname); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
223 Py_DECREF(entry); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
224 fname = cname = entry = NULL; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
225 } |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
226 |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
227 ret = parents; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
228 Py_INCREF(ret); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
229 quit: |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
230 Py_XDECREF(fname); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
231 Py_XDECREF(cname); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
232 Py_XDECREF(entry); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
233 Py_XDECREF(parents); |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
234 return ret; |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
235 } |
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
236 |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
237 static char parsers_doc[] = "Efficient content parsing."; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
238 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
239 static PyMethodDef methods[] = { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
240 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, |
7093
16bafcebd3d1
dirstate: C parsing extension
Matt Mackall <mpm@selenic.com>
parents:
7092
diff
changeset
|
241 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
242 {NULL, NULL} |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
243 }; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
244 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
245 PyMODINIT_FUNC initparsers(void) |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
246 { |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
247 Py_InitModule3("parsers", methods, parsers_doc); |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
248 } |