diff mercurial/cext/charencode.c @ 33944:f4433f2713d0

encoding: add function to test if a str consists of ASCII characters Most strings are ASCII. Let's optimize for it. Using uint64_t is slightly faster than uint32_t on 64bit system, but there isn't huge difference.
author Yuya Nishihara <yuya@tcha.org>
date Sun, 23 Apr 2017 12:59:42 +0900
parents 2c37f9dabc32
children e97be042fa1b
line wrap: on
line diff
--- a/mercurial/cext/charencode.c	Sun Apr 23 14:47:52 2017 +0900
+++ b/mercurial/cext/charencode.c	Sun Apr 23 12:59:42 2017 +0900
@@ -12,6 +12,7 @@
 #include <assert.h>
 
 #include "charencode.h"
+#include "compat.h"
 #include "util.h"
 
 #ifdef IS_PY3K
@@ -125,6 +126,29 @@
 	return ret;
 }
 
+PyObject *isasciistr(PyObject *self, PyObject *args)
+{
+	const char *buf;
+	Py_ssize_t i, len;
+	if (!PyArg_ParseTuple(args, "s#:isasciistr", &buf, &len))
+		return NULL;
+	i = 0;
+	/* char array in PyStringObject should be at least 4-byte aligned */
+	if (((uintptr_t)buf & 3) == 0) {
+		const uint32_t *p = (const uint32_t *)buf;
+		for (; i < len / 4; i++) {
+			if (p[i] & 0x80808080U)
+				Py_RETURN_FALSE;
+		}
+		i *= 4;
+	}
+	for (; i < len; i++) {
+		if (buf[i] & 0x80)
+			Py_RETURN_FALSE;
+	}
+	Py_RETURN_TRUE;
+}
+
 static inline PyObject *_asciitransform(PyObject *str_obj,
 					const char table[128],
 					PyObject *fallback_fn)