Mercurial > public > mercurial-scm > hg
diff mercurial/cext/charencode.c @ 33757:e9996bd7203f
cext: split character encoding functions to new compilation unit
This extracts charencode.c from parsers.c, which seems big enough for me
to hesitate to add new JSON functions. Still charencode.o is linked to
parsers.so to avoid duplication of binary codes.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Mon, 31 Jul 2017 22:28:27 +0900 |
parents | mercurial/cext/parsers.c@5866ba5e9c48 |
children | 0f4ac3b6dee4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/cext/charencode.c Mon Jul 31 22:28:27 2017 +0900 @@ -0,0 +1,209 @@ +/* + charencode.c - miscellaneous character encoding + + Copyright 2008 Matt Mackall <mpm@selenic.com> and others + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. +*/ + +#include <Python.h> + +#include "util.h" + +static const char lowertable[128] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', + '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ + '\x78', '\x79', '\x7a', /* X-Z */ + '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', + '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' +}; + +static const char uppertable[128] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', + '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', + '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ + '\x58', '\x59', '\x5a', /* x-z */ + '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' +}; + +/* + * Turn a hex-encoded string into binary. + */ +PyObject *unhexlify(const char *str, int len) +{ + PyObject *ret; + char *d; + int i; + + ret = PyBytes_FromStringAndSize(NULL, len / 2); + + if (!ret) + return NULL; + + d = PyBytes_AsString(ret); + + for (i = 0; i < len;) { + int hi = hexdigit(str, i++); + int lo = hexdigit(str, i++); + *d++ = (hi << 4) | lo; + } + + return ret; +} + +static inline PyObject *_asciitransform(PyObject *str_obj, + const char table[128], + PyObject *fallback_fn) +{ + char *str, *newstr; + Py_ssize_t i, len; + PyObject *newobj = NULL; + PyObject *ret = NULL; + + str = PyBytes_AS_STRING(str_obj); + len = PyBytes_GET_SIZE(str_obj); + + newobj = PyBytes_FromStringAndSize(NULL, len); + if (!newobj) + goto quit; + + newstr = PyBytes_AS_STRING(newobj); + + for (i = 0; i < len; i++) { + char c = str[i]; + if (c & 0x80) { + if (fallback_fn != NULL) { + ret = PyObject_CallFunctionObjArgs(fallback_fn, + str_obj, NULL); + } else { + PyObject *err = PyUnicodeDecodeError_Create( + "ascii", str, len, i, (i + 1), + "unexpected code byte"); + PyErr_SetObject(PyExc_UnicodeDecodeError, err); + Py_XDECREF(err); + } + goto quit; + } + newstr[i] = table[(unsigned char)c]; + } + + ret = newobj; + Py_INCREF(ret); +quit: + Py_XDECREF(newobj); + return ret; +} + +PyObject *asciilower(PyObject *self, PyObject *args) +{ + PyObject *str_obj; + if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) + return NULL; + return _asciitransform(str_obj, lowertable, NULL); +} + +PyObject *asciiupper(PyObject *self, PyObject *args) +{ + PyObject *str_obj; + if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) + return NULL; + return _asciitransform(str_obj, uppertable, NULL); +} + +PyObject *make_file_foldmap(PyObject *self, PyObject *args) +{ + PyObject *dmap, *spec_obj, *normcase_fallback; + PyObject *file_foldmap = NULL; + enum normcase_spec spec; + PyObject *k, *v; + dirstateTupleObject *tuple; + Py_ssize_t pos = 0; + const char *table; + + if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", + &PyDict_Type, &dmap, + &PyInt_Type, &spec_obj, + &PyFunction_Type, &normcase_fallback)) + goto quit; + + spec = (int)PyInt_AS_LONG(spec_obj); + switch (spec) { + case NORMCASE_LOWER: + table = lowertable; + break; + case NORMCASE_UPPER: + table = uppertable; + break; + case NORMCASE_OTHER: + table = NULL; + break; + default: + PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); + goto quit; + } + + /* Add some more entries to deal with additions outside this + function. */ + file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); + if (file_foldmap == NULL) + goto quit; + + while (PyDict_Next(dmap, &pos, &k, &v)) { + if (!dirstate_tuple_check(v)) { + PyErr_SetString(PyExc_TypeError, + "expected a dirstate tuple"); + goto quit; + } + + tuple = (dirstateTupleObject *)v; + if (tuple->state != 'r') { + PyObject *normed; + if (table != NULL) { + normed = _asciitransform(k, table, + normcase_fallback); + } else { + normed = PyObject_CallFunctionObjArgs( + normcase_fallback, k, NULL); + } + + if (normed == NULL) + goto quit; + if (PyDict_SetItem(file_foldmap, normed, k) == -1) { + Py_DECREF(normed); + goto quit; + } + Py_DECREF(normed); + } + } + return file_foldmap; +quit: + Py_XDECREF(file_foldmap); + return NULL; +}