mercurial/cext/revlog.c
changeset 40707 cc76ca9fca20
parent 40706 0650be877a37
child 40741 959130631de3
equal deleted inserted replaced
40706:0650be877a37 40707:cc76ca9fca20
    10 #include <Python.h>
    10 #include <Python.h>
    11 #include <assert.h>
    11 #include <assert.h>
    12 #include <ctype.h>
    12 #include <ctype.h>
    13 #include <limits.h>
    13 #include <limits.h>
    14 #include <stddef.h>
    14 #include <stddef.h>
       
    15 #include <stdlib.h>
    15 #include <string.h>
    16 #include <string.h>
    16 
    17 
    17 #include "bitmanipulation.h"
    18 #include "bitmanipulation.h"
    18 #include "charencode.h"
    19 #include "charencode.h"
    19 #include "util.h"
    20 #include "util.h"
  1092 			break;
  1093 			break;
  1093 		}
  1094 		}
  1094 		endidx -= 1;
  1095 		endidx -= 1;
  1095 	}
  1096 	}
  1096 	return endidx;
  1097 	return endidx;
       
  1098 }
       
  1099 
       
  1100 struct Gap {
       
  1101 	int64_t size;
       
  1102 	Py_ssize_t idx;
       
  1103 };
       
  1104 
       
  1105 static int gap_compare(const void *left, const void *right)
       
  1106 {
       
  1107 	const struct Gap *l_left = ((const struct Gap *)left);
       
  1108 	const struct Gap *l_right = ((const struct Gap *)right);
       
  1109 	if (l_left->size < l_right->size) {
       
  1110 		return -1;
       
  1111 	} else if (l_left->size > l_right->size) {
       
  1112 		return 1;
       
  1113 	}
       
  1114 	return 0;
       
  1115 }
       
  1116 static int Py_ssize_t_compare(const void *left, const void *right)
       
  1117 {
       
  1118 	const Py_ssize_t l_left = *(const Py_ssize_t *)left;
       
  1119 	const Py_ssize_t l_right = *(const Py_ssize_t *)right;
       
  1120 	if (l_left < l_right) {
       
  1121 		return -1;
       
  1122 	} else if (l_left > l_right) {
       
  1123 		return 1;
       
  1124 	}
       
  1125 	return 0;
       
  1126 }
       
  1127 
       
  1128 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
       
  1129 {
       
  1130 	/* method arguments */
       
  1131 	PyObject *list_revs = NULL; /* revisions in the chain */
       
  1132 	double targetdensity = 0;   /* min density to achieve */
       
  1133 	Py_ssize_t mingapsize = 0;  /* threshold to ignore gaps */
       
  1134 
       
  1135 	/* other core variables */
       
  1136 	Py_ssize_t idxlen = index_length(self);
       
  1137 	Py_ssize_t i;            /* used for various iteration */
       
  1138 	PyObject *result = NULL; /* the final return of the function */
       
  1139 
       
  1140 	/* generic information about the delta chain being slice */
       
  1141 	Py_ssize_t num_revs = 0;    /* size of the full delta chain */
       
  1142 	Py_ssize_t *revs = NULL;    /* native array of revision in the chain */
       
  1143 	int64_t chainpayload = 0;   /* sum of all delta in the chain */
       
  1144 	int64_t deltachainspan = 0; /* distance from first byte to last byte */
       
  1145 
       
  1146 	/* variable used for slicing the delta chain */
       
  1147 	int64_t readdata = 0; /* amount of data currently planned to be read */
       
  1148 	double density = 0;   /* ration of payload data compared to read ones */
       
  1149 	int64_t previous_end;
       
  1150 	struct Gap *gaps = NULL; /* array of notable gap in the chain */
       
  1151 	Py_ssize_t num_gaps =
       
  1152 	    0; /* total number of notable gap recorded so far */
       
  1153 	Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
       
  1154 	Py_ssize_t num_selected = 0;         /* number of gaps skipped */
       
  1155 	PyObject *chunk = NULL;              /* individual slice */
       
  1156 	PyObject *allchunks = NULL;          /* all slices */
       
  1157 	Py_ssize_t previdx;
       
  1158 
       
  1159 	/* parsing argument */
       
  1160 	if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
       
  1161 	                      &targetdensity, &mingapsize)) {
       
  1162 		goto bail;
       
  1163 	}
       
  1164 
       
  1165 	/* If the delta chain contains a single element, we do not need slicing
       
  1166 	 */
       
  1167 	num_revs = PyList_GET_SIZE(list_revs);
       
  1168 	if (num_revs <= 1) {
       
  1169 		result = PyTuple_Pack(1, list_revs);
       
  1170 		goto done;
       
  1171 	}
       
  1172 
       
  1173 	/* Turn the python list into a native integer array (for efficiency) */
       
  1174 	revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
       
  1175 	if (revs == NULL) {
       
  1176 		PyErr_NoMemory();
       
  1177 		goto bail;
       
  1178 	}
       
  1179 	for (i = 0; i < num_revs; i++) {
       
  1180 		Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
       
  1181 		if (revnum == -1 && PyErr_Occurred()) {
       
  1182 			goto bail;
       
  1183 		}
       
  1184 		if (revnum < 0 || revnum >= idxlen) {
       
  1185 			PyErr_SetString(PyExc_IndexError, "index out of range");
       
  1186 			goto bail;
       
  1187 		}
       
  1188 		revs[i] = revnum;
       
  1189 	}
       
  1190 
       
  1191 	/* Compute and check various property of the unsliced delta chain */
       
  1192 	deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
       
  1193 	if (deltachainspan < 0) {
       
  1194 		goto bail;
       
  1195 	}
       
  1196 
       
  1197 	if (deltachainspan <= mingapsize) {
       
  1198 		result = PyTuple_Pack(1, list_revs);
       
  1199 		goto done;
       
  1200 	}
       
  1201 	chainpayload = 0;
       
  1202 	for (i = 0; i < num_revs; i++) {
       
  1203 		int tmp = index_get_length(self, revs[i]);
       
  1204 		if (tmp < 0) {
       
  1205 			goto bail;
       
  1206 		}
       
  1207 		chainpayload += tmp;
       
  1208 	}
       
  1209 
       
  1210 	readdata = deltachainspan;
       
  1211 	density = 1.0;
       
  1212 
       
  1213 	if (0 < deltachainspan) {
       
  1214 		density = (double)chainpayload / (double)deltachainspan;
       
  1215 	}
       
  1216 
       
  1217 	if (density >= targetdensity) {
       
  1218 		result = PyTuple_Pack(1, list_revs);
       
  1219 		goto done;
       
  1220 	}
       
  1221 
       
  1222 	/* if chain is too sparse, look for relevant gaps */
       
  1223 	gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
       
  1224 	if (gaps == NULL) {
       
  1225 		PyErr_NoMemory();
       
  1226 		goto bail;
       
  1227 	}
       
  1228 
       
  1229 	previous_end = -1;
       
  1230 	for (i = 0; i < num_revs; i++) {
       
  1231 		int64_t revstart;
       
  1232 		int revsize;
       
  1233 		revstart = index_get_start(self, revs[i]);
       
  1234 		if (revstart < 0) {
       
  1235 			goto bail;
       
  1236 		};
       
  1237 		revsize = index_get_length(self, revs[i]);
       
  1238 		if (revsize < 0) {
       
  1239 			goto bail;
       
  1240 		};
       
  1241 		if (revsize == 0) {
       
  1242 			continue;
       
  1243 		}
       
  1244 		if (previous_end >= 0) {
       
  1245 			int64_t gapsize = revstart - previous_end;
       
  1246 			if (gapsize > mingapsize) {
       
  1247 				gaps[num_gaps].size = gapsize;
       
  1248 				gaps[num_gaps].idx = i;
       
  1249 				num_gaps += 1;
       
  1250 			}
       
  1251 		}
       
  1252 		previous_end = revstart + revsize;
       
  1253 	}
       
  1254 	if (num_gaps == 0) {
       
  1255 		result = PyTuple_Pack(1, list_revs);
       
  1256 		goto done;
       
  1257 	}
       
  1258 	qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
       
  1259 
       
  1260 	/* Slice the largest gap first, they improve the density the most */
       
  1261 	selected_indices =
       
  1262 	    (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
       
  1263 	if (selected_indices == NULL) {
       
  1264 		PyErr_NoMemory();
       
  1265 		goto bail;
       
  1266 	}
       
  1267 
       
  1268 	for (i = num_gaps - 1; i >= 0; i--) {
       
  1269 		selected_indices[num_selected] = gaps[i].idx;
       
  1270 		readdata -= gaps[i].size;
       
  1271 		num_selected += 1;
       
  1272 		if (readdata <= 0) {
       
  1273 			density = 1.0;
       
  1274 		} else {
       
  1275 			density = (double)chainpayload / (double)readdata;
       
  1276 		}
       
  1277 		if (density >= targetdensity) {
       
  1278 			break;
       
  1279 		}
       
  1280 	}
       
  1281 	qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
       
  1282 	      &Py_ssize_t_compare);
       
  1283 
       
  1284 	/* create the resulting slice */
       
  1285 	allchunks = PyList_New(0);
       
  1286 	if (allchunks == NULL) {
       
  1287 		goto bail;
       
  1288 	}
       
  1289 	previdx = 0;
       
  1290 	selected_indices[num_selected] = num_revs;
       
  1291 	for (i = 0; i <= num_selected; i++) {
       
  1292 		Py_ssize_t idx = selected_indices[i];
       
  1293 		Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
       
  1294 		if (endidx < 0) {
       
  1295 			goto bail;
       
  1296 		}
       
  1297 		if (previdx < endidx) {
       
  1298 			chunk = PyList_GetSlice(list_revs, previdx, endidx);
       
  1299 			if (chunk == NULL) {
       
  1300 				goto bail;
       
  1301 			}
       
  1302 			if (PyList_Append(allchunks, chunk) == -1) {
       
  1303 				goto bail;
       
  1304 			}
       
  1305 			Py_DECREF(chunk);
       
  1306 			chunk = NULL;
       
  1307 		}
       
  1308 		previdx = idx;
       
  1309 	}
       
  1310 	result = allchunks;
       
  1311 	goto done;
       
  1312 
       
  1313 bail:
       
  1314 	Py_XDECREF(allchunks);
       
  1315 	Py_XDECREF(chunk);
       
  1316 done:
       
  1317 	free(revs);
       
  1318 	free(gaps);
       
  1319 	free(selected_indices);
       
  1320 	return result;
  1097 }
  1321 }
  1098 
  1322 
  1099 static inline int nt_level(const char *node, Py_ssize_t level)
  1323 static inline int nt_level(const char *node, Py_ssize_t level)
  1100 {
  1324 {
  1101 	int v = node[level >> 1];
  1325 	int v = node[level >> 1];
  2326      "get head revisions"}, /* Can do filtering since 3.2 */
  2550      "get head revisions"}, /* Can do filtering since 3.2 */
  2327     {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
  2551     {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
  2328      "get filtered head revisions"}, /* Can always do filtering */
  2552      "get filtered head revisions"}, /* Can always do filtering */
  2329     {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
  2553     {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
  2330      "determine revisions with deltas to reconstruct fulltext"},
  2554      "determine revisions with deltas to reconstruct fulltext"},
       
  2555     {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
       
  2556      METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
  2331     {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
  2557     {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
  2332     {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
  2558     {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
  2333      "match a potentially ambiguous node ID"},
  2559      "match a potentially ambiguous node ID"},
  2334     {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
  2560     {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
  2335      "find length of shortest hex nodeid of a binary ID"},
  2561      "find length of shortest hex nodeid of a binary ID"},