diff mercurial/parsers.c @ 24803:e89f909edffa stable 3.4-rc

merge default into stable for 3.4 freeze
author Matt Mackall <mpm@selenic.com>
date Thu, 16 Apr 2015 20:57:51 -0500
parents f2fd087a75ef
children b3142ea2a0d4
line wrap: on
line diff
--- a/mercurial/parsers.c	Thu Apr 16 22:33:53 2015 +0900
+++ b/mercurial/parsers.c	Thu Apr 16 20:57:51 2015 -0500
@@ -56,6 +56,27 @@
 	'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
 };
 
+static char uppertable[128] = {
+	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
+	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
+	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
+	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
+	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
+	'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
+	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
+	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
+	'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
+	'\x60',
+		'\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
+	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
+	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
+	'\x58', '\x59', '\x5a', 					/* x-z */
+				'\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
+};
+
 static inline int hexdigit(const char *p, Py_ssize_t off)
 {
 	int8_t val = hextable[(unsigned char)p[off]];
@@ -71,7 +92,7 @@
 /*
  * Turn a hex-encoded string into binary.
  */
-static PyObject *unhexlify(const char *str, int len)
+PyObject *unhexlify(const char *str, int len)
 {
 	PyObject *ret;
 	char *d;
@@ -93,14 +114,17 @@
 	return ret;
 }
 
-static PyObject *asciilower(PyObject *self, PyObject *args)
+static inline PyObject *_asciitransform(PyObject *str_obj,
+					const char table[128],
+					PyObject *fallback_fn)
 {
 	char *str, *newstr;
-	int i, len;
+	Py_ssize_t i, len;
 	PyObject *newobj = NULL;
+	PyObject *ret = NULL;
 
-	if (!PyArg_ParseTuple(args, "s#", &str, &len))
-		goto quit;
+	str = PyBytes_AS_STRING(str_obj);
+	len = PyBytes_GET_SIZE(str_obj);
 
 	newobj = PyBytes_FromStringAndSize(NULL, len);
 	if (!newobj)
@@ -111,19 +135,120 @@
 	for (i = 0; i < len; i++) {
 		char c = str[i];
 		if (c & 0x80) {
-			PyObject *err = PyUnicodeDecodeError_Create(
-				"ascii", str, len, i, (i + 1),
-				"unexpected code byte");
-			PyErr_SetObject(PyExc_UnicodeDecodeError, err);
-			Py_XDECREF(err);
+			if (fallback_fn != NULL) {
+				ret = PyObject_CallFunctionObjArgs(fallback_fn,
+					str_obj, NULL);
+			} else {
+				PyObject *err = PyUnicodeDecodeError_Create(
+					"ascii", str, len, i, (i + 1),
+					"unexpected code byte");
+				PyErr_SetObject(PyExc_UnicodeDecodeError, err);
+				Py_XDECREF(err);
+			}
 			goto quit;
 		}
-		newstr[i] = lowertable[(unsigned char)c];
+		newstr[i] = table[(unsigned char)c];
 	}
 
-	return newobj;
+	ret = newobj;
+	Py_INCREF(ret);
 quit:
 	Py_XDECREF(newobj);
+	return ret;
+}
+
+static PyObject *asciilower(PyObject *self, PyObject *args)
+{
+	PyObject *str_obj;
+	if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
+		return NULL;
+	return _asciitransform(str_obj, lowertable, NULL);
+}
+
+static PyObject *asciiupper(PyObject *self, PyObject *args)
+{
+	PyObject *str_obj;
+	if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
+		return NULL;
+	return _asciitransform(str_obj, uppertable, NULL);
+}
+
+static PyObject *make_file_foldmap(PyObject *self, PyObject *args)
+{
+	PyObject *dmap, *spec_obj, *normcase_fallback;
+	PyObject *file_foldmap = NULL;
+	enum normcase_spec spec;
+	PyObject *k, *v;
+	dirstateTupleObject *tuple;
+	Py_ssize_t pos = 0;
+	const char *table;
+
+	if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
+			      &PyDict_Type, &dmap,
+			      &PyInt_Type, &spec_obj,
+			      &PyFunction_Type, &normcase_fallback))
+		goto quit;
+
+	spec = (int)PyInt_AS_LONG(spec_obj);
+	switch (spec) {
+	case NORMCASE_LOWER:
+		table = lowertable;
+		break;
+	case NORMCASE_UPPER:
+		table = uppertable;
+		break;
+	case NORMCASE_OTHER:
+		table = NULL;
+		break;
+	default:
+		PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
+		goto quit;
+	}
+
+#if PY_VERSION_HEX >= 0x02060000
+	/* _PyDict_NewPresized expects a minused parameter, but it actually
+	   creates a dictionary that's the nearest power of two bigger than the
+	   parameter. For example, with the initial minused = 1000, the
+	   dictionary created has size 1024. Of course in a lot of cases that
+	   can be greater than the maximum load factor Python's dict object
+	   expects (= 2/3), so as soon as we cross the threshold we'll resize
+	   anyway. So create a dictionary that's 3/2 the size. Also add some
+	   more to deal with additions outside this function. */
+	file_foldmap = _PyDict_NewPresized((PyDict_Size(dmap) / 5) * 8);
+#else
+	file_foldmap = PyDict_New();
+#endif
+
+	if (file_foldmap == NULL)
+		goto quit;
+
+	while (PyDict_Next(dmap, &pos, &k, &v)) {
+		if (!dirstate_tuple_check(v)) {
+			PyErr_SetString(PyExc_TypeError,
+					"expected a dirstate tuple");
+			goto quit;
+		}
+
+		tuple = (dirstateTupleObject *)v;
+		if (tuple->state != 'r') {
+			PyObject *normed;
+			if (table != NULL) {
+				normed = _asciitransform(k, table,
+					normcase_fallback);
+			} else {
+				normed = PyObject_CallFunctionObjArgs(
+					normcase_fallback, k, NULL);
+			}
+
+			if (normed == NULL)
+				goto quit;
+			if (PyDict_SetItem(file_foldmap, normed, k) == -1)
+				goto quit;
+		}
+	}
+	return file_foldmap;
+quit:
+	Py_XDECREF(file_foldmap);
 	return NULL;
 }
 
@@ -911,6 +1036,111 @@
 	}
 }
 
+static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
+                                    Py_ssize_t marker, char *phases)
+{
+	PyObject *iter = NULL;
+	PyObject *iter_item = NULL;
+	Py_ssize_t min_idx = index_length(self) + 1;
+	long iter_item_long;
+
+	if (PyList_GET_SIZE(list) != 0) {
+		iter = PyObject_GetIter(list);
+		if (iter == NULL)
+			return -2;
+		while ((iter_item = PyIter_Next(iter)))
+		{
+			iter_item_long = PyInt_AS_LONG(iter_item);
+			Py_DECREF(iter_item);
+			if (iter_item_long < min_idx)
+				min_idx = iter_item_long;
+			phases[iter_item_long] = marker;
+		}
+		Py_DECREF(iter);
+	}
+
+	return min_idx;
+}
+
+static inline void set_phase_from_parents(char *phases, int parent_1,
+                                          int parent_2, Py_ssize_t i)
+{
+	if (parent_1 >= 0 && phases[parent_1] > phases[i])
+		phases[i] = phases[parent_1];
+	if (parent_2 >= 0 && phases[parent_2] > phases[i])
+		phases[i] = phases[parent_2];
+}
+
+static PyObject *compute_phases(indexObject *self, PyObject *args)
+{
+	PyObject *roots = Py_None;
+	PyObject *phaseslist = NULL;
+	PyObject *phaseroots = NULL;
+	PyObject *rev = NULL;
+	PyObject *p1 = NULL;
+	PyObject *p2 = NULL;
+	Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
+	Py_ssize_t len = index_length(self) - 1;
+	Py_ssize_t numphase = 0;
+	Py_ssize_t minrevallphases = 0;
+	Py_ssize_t minrevphase = 0;
+	Py_ssize_t i = 0;
+	int parent_1, parent_2;
+	char *phases = NULL;
+	const char *data;
+
+	if (!PyArg_ParseTuple(args, "O", &roots))
+		goto release_none;
+	if (roots == NULL || !PyList_Check(roots))
+		goto release_none;
+
+	phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
+	if (phases == NULL)
+		goto release_none;
+	/* Put the phase information of all the roots in phases */
+	numphase = PyList_GET_SIZE(roots)+1;
+	minrevallphases = len + 1;
+	for (i = 0; i < numphase-1; i++) {
+		phaseroots = PyList_GET_ITEM(roots, i);
+		if (!PyList_Check(phaseroots))
+			goto release_phases;
+		minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
+		if (minrevphase == -2) /* Error from add_roots_get_min */
+			goto release_phases;
+		minrevallphases = MIN(minrevallphases, minrevphase);
+	}
+	/* Propagate the phase information from the roots to the revs */
+	if (minrevallphases != -1) {
+		for (i = minrevallphases; i < self->raw_length; i++) {
+			data = index_deref(self, i);
+			set_phase_from_parents(phases, getbe32(data+24), getbe32(data+28), i);
+		}
+		for (i = 0; i < addlen; i++) {
+			rev = PyList_GET_ITEM(self->added, i);
+			p1 = PyTuple_GET_ITEM(rev, 5);
+			p2 = PyTuple_GET_ITEM(rev, 6);
+			if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
+				PyErr_SetString(PyExc_TypeError, "revlog parents are invalid");
+				goto release_phases;
+			}
+			parent_1 = (int)PyInt_AS_LONG(p1);
+			parent_2 = (int)PyInt_AS_LONG(p2);
+			set_phase_from_parents(phases, parent_1, parent_2, i+self->raw_length);
+		}
+	}
+	/* Transform phase list to a python list */
+	phaseslist = PyList_New(len);
+	if (phaseslist == NULL)
+		goto release_phases;
+	for (i = 0; i < len; i++)
+		PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phases[i]));
+
+release_phases:
+	free(phases);
+release_none:
+	return phaseslist;
+}
+
 static PyObject *index_headrevs(indexObject *self, PyObject *args)
 {
 	Py_ssize_t i, len, addlen;
@@ -1102,6 +1332,11 @@
 static int nt_new(indexObject *self)
 {
 	if (self->ntlength == self->ntcapacity) {
+		if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
+			PyErr_SetString(PyExc_MemoryError,
+					"overflow in nt_new");
+			return -1;
+		}
 		self->ntcapacity *= 2;
 		self->nt = realloc(self->nt,
 				   self->ntcapacity * sizeof(nodetree));
@@ -1163,7 +1398,7 @@
 static int nt_init(indexObject *self)
 {
 	if (self->nt == NULL) {
-		if (self->raw_length > INT_MAX) {
+		if (self->raw_length > INT_MAX / sizeof(nodetree)) {
 			PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
 			return -1;
 		}
@@ -1676,108 +1911,6 @@
 }
 
 /*
- * Given a (possibly overlapping) set of revs, return the greatest
- * common ancestors: those with the longest path to the root.
- */
-static PyObject *index_ancestors(indexObject *self, PyObject *args)
-{
-	PyObject *ret = NULL, *gca = NULL;
-	Py_ssize_t argcount, i, len;
-	bitmask repeat = 0;
-	int revcount = 0;
-	int *revs;
-
-	argcount = PySequence_Length(args);
-	revs = malloc(argcount * sizeof(*revs));
-	if (argcount > 0 && revs == NULL)
-		return PyErr_NoMemory();
-	len = index_length(self) - 1;
-
-	for (i = 0; i < argcount; i++) {
-		static const int capacity = 24;
-		PyObject *obj = PySequence_GetItem(args, i);
-		bitmask x;
-		long val;
-
-		if (!PyInt_Check(obj)) {
-			PyErr_SetString(PyExc_TypeError,
-					"arguments must all be ints");
-			Py_DECREF(obj);
-			goto bail;
-		}
-		val = PyInt_AsLong(obj);
-		Py_DECREF(obj);
-		if (val == -1) {
-			ret = PyList_New(0);
-			goto done;
-		}
-		if (val < 0 || val >= len) {
-			PyErr_SetString(PyExc_IndexError,
-					"index out of range");
-			goto bail;
-		}
-		/* this cheesy bloom filter lets us avoid some more
-		 * expensive duplicate checks in the common set-is-disjoint
-		 * case */
-		x = 1ull << (val & 0x3f);
-		if (repeat & x) {
-			int k;
-			for (k = 0; k < revcount; k++) {
-				if (val == revs[k])
-					goto duplicate;
-			}
-		}
-		else repeat |= x;
-		if (revcount >= capacity) {
-			PyErr_Format(PyExc_OverflowError,
-				     "bitset size (%d) > capacity (%d)",
-				     revcount, capacity);
-			goto bail;
-		}
-		revs[revcount++] = (int)val;
-	duplicate:;
-	}
-
-	if (revcount == 0) {
-		ret = PyList_New(0);
-		goto done;
-	}
-	if (revcount == 1) {
-		PyObject *obj;
-		ret = PyList_New(1);
-		if (ret == NULL)
-			goto bail;
-		obj = PyInt_FromLong(revs[0]);
-		if (obj == NULL)
-			goto bail;
-		PyList_SET_ITEM(ret, 0, obj);
-		goto done;
-	}
-
-	gca = find_gca_candidates(self, revs, revcount);
-	if (gca == NULL)
-		goto bail;
-
-	if (PyList_GET_SIZE(gca) <= 1) {
-		ret = gca;
-		Py_INCREF(gca);
-	}
-	else ret = find_deepest(self, gca);
-
-done:
-	free(revs);
-	Py_XDECREF(gca);
-
-	return ret;
-
-bail:
-	free(revs);
-	Py_XDECREF(gca);
-	Py_XDECREF(ret);
-	return NULL;
-}
-
-/*
  * Given a (possibly overlapping) set of revs, return all the
  * common ancestors heads: heads(::args[0] and ::a[1] and ...)
  */
@@ -1871,6 +2004,24 @@
 }
 
 /*
+ * Given a (possibly overlapping) set of revs, return the greatest
+ * common ancestors: those with the longest path to the root.
+ */
+static PyObject *index_ancestors(indexObject *self, PyObject *args)
+{
+	PyObject *gca = index_commonancestorsheads(self, args);
+	if (gca == NULL)
+		return NULL;
+
+	if (PyList_GET_SIZE(gca) <= 1) {
+		Py_INCREF(gca);
+		return gca;
+	}
+
+	return find_deepest(self, gca);
+}
+
+/*
  * Invalidate any trie entries introduced by added revs.
  */
 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
@@ -2127,6 +2278,8 @@
 	 "clear the index caches"},
 	{"get", (PyCFunction)index_m_get, METH_VARARGS,
 	 "get an index entry"},
+	{"computephases", (PyCFunction)compute_phases, METH_VARARGS,
+		"compute phases"},
 	{"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
 	 "get head revisions"}, /* Can do filtering since 3.2 */
 	{"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
@@ -2230,6 +2383,157 @@
 	return NULL;
 }
 
+#define BUMPED_FIX 1
+#define USING_SHA_256 2
+
+static PyObject *readshas(
+	const char *source, unsigned char num, Py_ssize_t hashwidth)
+{
+	int i;
+	PyObject *list = PyTuple_New(num);
+	if (list == NULL) {
+		return NULL;
+	}
+	for (i = 0; i < num; i++) {
+		PyObject *hash = PyString_FromStringAndSize(source, hashwidth);
+		if (hash == NULL) {
+			Py_DECREF(list);
+			return NULL;
+		}
+		PyTuple_SetItem(list, i, hash);
+		source += hashwidth;
+	}
+	return list;
+}
+
+static PyObject *fm1readmarker(const char *data, uint32_t *msize)
+{
+	const char *meta;
+
+	double mtime;
+	int16_t tz;
+	uint16_t flags;
+	unsigned char nsuccs, nparents, nmetadata;
+	Py_ssize_t hashwidth = 20;
+
+	PyObject *prec = NULL, *parents = NULL, *succs = NULL;
+	PyObject *metadata = NULL, *ret = NULL;
+	int i;
+
+	*msize = getbe32(data);
+	data += 4;
+	mtime = getbefloat64(data);
+	data += 8;
+	tz = getbeint16(data);
+	data += 2;
+	flags = getbeuint16(data);
+	data += 2;
+
+	if (flags & USING_SHA_256) {
+		hashwidth = 32;
+	}
+
+	nsuccs = (unsigned char)(*data++);
+	nparents = (unsigned char)(*data++);
+	nmetadata = (unsigned char)(*data++);
+
+	prec = PyString_FromStringAndSize(data, hashwidth);
+	data += hashwidth;
+	if (prec == NULL) {
+		goto bail;
+	}
+
+	succs = readshas(data, nsuccs, hashwidth);
+	if (succs == NULL) {
+		goto bail;
+	}
+	data += nsuccs * hashwidth;
+
+	if (nparents == 1 || nparents == 2) {
+		parents = readshas(data, nparents, hashwidth);
+		if (parents == NULL) {
+			goto bail;
+		}
+		data += nparents * hashwidth;
+	} else {
+		parents = Py_None;
+	}
+
+	meta = data + (2 * nmetadata);
+	metadata = PyTuple_New(nmetadata);
+	if (metadata == NULL) {
+		goto bail;
+	}
+	for (i = 0; i < nmetadata; i++) {
+		PyObject *tmp, *left = NULL, *right = NULL;
+		Py_ssize_t metasize = (unsigned char)(*data++);
+		left = PyString_FromStringAndSize(meta, metasize);
+		meta += metasize;
+		metasize = (unsigned char)(*data++);
+		right = PyString_FromStringAndSize(meta, metasize);
+		meta += metasize;
+		if (!left || !right) {
+			Py_XDECREF(left);
+			Py_XDECREF(right);
+			goto bail;
+		}
+		tmp = PyTuple_Pack(2, left, right);
+		Py_DECREF(left);
+		Py_DECREF(right);
+		if (!tmp) {
+			goto bail;
+		}
+		PyTuple_SetItem(metadata, i, tmp);
+	}
+	ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags,
+			    metadata, mtime, (int)tz * 60, parents);
+bail:
+	Py_XDECREF(prec);
+	Py_XDECREF(succs);
+	Py_XDECREF(metadata);
+	if (parents != Py_None)
+		Py_XDECREF(parents);
+	return ret;
+}
+
+
+static PyObject *fm1readmarkers(PyObject *self, PyObject *args) {
+	const char *data;
+	Py_ssize_t datalen;
+	/* only unsigned long because python 2.4, should be Py_ssize_t */
+	unsigned long offset, stop;
+	PyObject *markers = NULL;
+
+	/* replace kk with nn when we drop Python 2.4 */
+	if (!PyArg_ParseTuple(args, "s#kk", &data, &datalen, &offset, &stop)) {
+		return NULL;
+	}
+	data += offset;
+	markers = PyList_New(0);
+	if (!markers) {
+		return NULL;
+	}
+	while (offset < stop) {
+		uint32_t msize;
+		int error;
+		PyObject *record = fm1readmarker(data, &msize);
+		if (!record) {
+			goto bail;
+		}
+		error = PyList_Append(markers, record);
+		Py_DECREF(record);
+		if (error) {
+			goto bail;
+		}
+		data += msize;
+		offset += msize;
+	}
+	return markers;
+bail:
+	Py_DECREF(markers);
+	return NULL;
+}
+
 static char parsers_doc[] = "Efficient content parsing.";
 
 PyObject *encodedir(PyObject *self, PyObject *args);
@@ -2242,13 +2546,19 @@
 	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
 	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
 	{"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
+	{"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
+	{"make_file_foldmap", make_file_foldmap, METH_VARARGS,
+	 "make file foldmap\n"},
 	{"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
 	{"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
 	{"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
+	{"fm1readmarkers", fm1readmarkers, METH_VARARGS,
+			"parse v1 obsolete markers\n"},
 	{NULL, NULL}
 };
 
 void dirs_module_init(PyObject *mod);
+void manifest_module_init(PyObject *mod);
 
 static void module_init(PyObject *mod)
 {
@@ -2263,6 +2573,7 @@
 	PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
 
 	dirs_module_init(mod);
+	manifest_module_init(mod);
 
 	indexType.tp_new = PyType_GenericNew;
 	if (PyType_Ready(&indexType) < 0 ||