Mercurial > public > mercurial-scm > hg-stable
diff mercurial/cext/charencode.c @ 33944:f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Most strings are ASCII. Let's optimize for it.
Using uint64_t is slightly faster than uint32_t on 64bit system, but there
isn't huge difference.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 23 Apr 2017 12:59:42 +0900 |
parents | 2c37f9dabc32 |
children | e97be042fa1b |
line wrap: on
line diff
--- a/mercurial/cext/charencode.c Sun Apr 23 14:47:52 2017 +0900 +++ b/mercurial/cext/charencode.c Sun Apr 23 12:59:42 2017 +0900 @@ -12,6 +12,7 @@ #include <assert.h> #include "charencode.h" +#include "compat.h" #include "util.h" #ifdef IS_PY3K @@ -125,6 +126,29 @@ return ret; } +PyObject *isasciistr(PyObject *self, PyObject *args) +{ + const char *buf; + Py_ssize_t i, len; + if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len)) + return NULL; + i = 0; + /* char array in PyStringObject should be at least 4-byte aligned */ + if (((uintptr_t)buf & 3) == 0) { + const uint32_t *p = (const uint32_t *)buf; + for (; i < len / 4; i++) { + if (p[i] & 0x80808080U) + Py_RETURN_FALSE; + } + i *= 4; + } + for (; i < len; i++) { + if (buf[i] & 0x80) + Py_RETURN_FALSE; + } + Py_RETURN_TRUE; +} + static inline PyObject *_asciitransform(PyObject *str_obj, const char table[128], PyObject *fallback_fn)