1092 break; |
1093 break; |
1093 } |
1094 } |
1094 endidx -= 1; |
1095 endidx -= 1; |
1095 } |
1096 } |
1096 return endidx; |
1097 return endidx; |
|
1098 } |
|
1099 |
|
1100 struct Gap { |
|
1101 int64_t size; |
|
1102 Py_ssize_t idx; |
|
1103 }; |
|
1104 |
|
1105 static int gap_compare(const void *left, const void *right) |
|
1106 { |
|
1107 const struct Gap *l_left = ((const struct Gap *)left); |
|
1108 const struct Gap *l_right = ((const struct Gap *)right); |
|
1109 if (l_left->size < l_right->size) { |
|
1110 return -1; |
|
1111 } else if (l_left->size > l_right->size) { |
|
1112 return 1; |
|
1113 } |
|
1114 return 0; |
|
1115 } |
|
1116 static int Py_ssize_t_compare(const void *left, const void *right) |
|
1117 { |
|
1118 const Py_ssize_t l_left = *(const Py_ssize_t *)left; |
|
1119 const Py_ssize_t l_right = *(const Py_ssize_t *)right; |
|
1120 if (l_left < l_right) { |
|
1121 return -1; |
|
1122 } else if (l_left > l_right) { |
|
1123 return 1; |
|
1124 } |
|
1125 return 0; |
|
1126 } |
|
1127 |
|
1128 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args) |
|
1129 { |
|
1130 /* method arguments */ |
|
1131 PyObject *list_revs = NULL; /* revisions in the chain */ |
|
1132 double targetdensity = 0; /* min density to achieve */ |
|
1133 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */ |
|
1134 |
|
1135 /* other core variables */ |
|
1136 Py_ssize_t idxlen = index_length(self); |
|
1137 Py_ssize_t i; /* used for various iteration */ |
|
1138 PyObject *result = NULL; /* the final return of the function */ |
|
1139 |
|
1140 /* generic information about the delta chain being slice */ |
|
1141 Py_ssize_t num_revs = 0; /* size of the full delta chain */ |
|
1142 Py_ssize_t *revs = NULL; /* native array of revision in the chain */ |
|
1143 int64_t chainpayload = 0; /* sum of all delta in the chain */ |
|
1144 int64_t deltachainspan = 0; /* distance from first byte to last byte */ |
|
1145 |
|
1146 /* variable used for slicing the delta chain */ |
|
1147 int64_t readdata = 0; /* amount of data currently planned to be read */ |
|
1148 double density = 0; /* ration of payload data compared to read ones */ |
|
1149 int64_t previous_end; |
|
1150 struct Gap *gaps = NULL; /* array of notable gap in the chain */ |
|
1151 Py_ssize_t num_gaps = |
|
1152 0; /* total number of notable gap recorded so far */ |
|
1153 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */ |
|
1154 Py_ssize_t num_selected = 0; /* number of gaps skipped */ |
|
1155 PyObject *chunk = NULL; /* individual slice */ |
|
1156 PyObject *allchunks = NULL; /* all slices */ |
|
1157 Py_ssize_t previdx; |
|
1158 |
|
1159 /* parsing argument */ |
|
1160 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs, |
|
1161 &targetdensity, &mingapsize)) { |
|
1162 goto bail; |
|
1163 } |
|
1164 |
|
1165 /* If the delta chain contains a single element, we do not need slicing |
|
1166 */ |
|
1167 num_revs = PyList_GET_SIZE(list_revs); |
|
1168 if (num_revs <= 1) { |
|
1169 result = PyTuple_Pack(1, list_revs); |
|
1170 goto done; |
|
1171 } |
|
1172 |
|
1173 /* Turn the python list into a native integer array (for efficiency) */ |
|
1174 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t)); |
|
1175 if (revs == NULL) { |
|
1176 PyErr_NoMemory(); |
|
1177 goto bail; |
|
1178 } |
|
1179 for (i = 0; i < num_revs; i++) { |
|
1180 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i)); |
|
1181 if (revnum == -1 && PyErr_Occurred()) { |
|
1182 goto bail; |
|
1183 } |
|
1184 if (revnum < 0 || revnum >= idxlen) { |
|
1185 PyErr_SetString(PyExc_IndexError, "index out of range"); |
|
1186 goto bail; |
|
1187 } |
|
1188 revs[i] = revnum; |
|
1189 } |
|
1190 |
|
1191 /* Compute and check various property of the unsliced delta chain */ |
|
1192 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]); |
|
1193 if (deltachainspan < 0) { |
|
1194 goto bail; |
|
1195 } |
|
1196 |
|
1197 if (deltachainspan <= mingapsize) { |
|
1198 result = PyTuple_Pack(1, list_revs); |
|
1199 goto done; |
|
1200 } |
|
1201 chainpayload = 0; |
|
1202 for (i = 0; i < num_revs; i++) { |
|
1203 int tmp = index_get_length(self, revs[i]); |
|
1204 if (tmp < 0) { |
|
1205 goto bail; |
|
1206 } |
|
1207 chainpayload += tmp; |
|
1208 } |
|
1209 |
|
1210 readdata = deltachainspan; |
|
1211 density = 1.0; |
|
1212 |
|
1213 if (0 < deltachainspan) { |
|
1214 density = (double)chainpayload / (double)deltachainspan; |
|
1215 } |
|
1216 |
|
1217 if (density >= targetdensity) { |
|
1218 result = PyTuple_Pack(1, list_revs); |
|
1219 goto done; |
|
1220 } |
|
1221 |
|
1222 /* if chain is too sparse, look for relevant gaps */ |
|
1223 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap)); |
|
1224 if (gaps == NULL) { |
|
1225 PyErr_NoMemory(); |
|
1226 goto bail; |
|
1227 } |
|
1228 |
|
1229 previous_end = -1; |
|
1230 for (i = 0; i < num_revs; i++) { |
|
1231 int64_t revstart; |
|
1232 int revsize; |
|
1233 revstart = index_get_start(self, revs[i]); |
|
1234 if (revstart < 0) { |
|
1235 goto bail; |
|
1236 }; |
|
1237 revsize = index_get_length(self, revs[i]); |
|
1238 if (revsize < 0) { |
|
1239 goto bail; |
|
1240 }; |
|
1241 if (revsize == 0) { |
|
1242 continue; |
|
1243 } |
|
1244 if (previous_end >= 0) { |
|
1245 int64_t gapsize = revstart - previous_end; |
|
1246 if (gapsize > mingapsize) { |
|
1247 gaps[num_gaps].size = gapsize; |
|
1248 gaps[num_gaps].idx = i; |
|
1249 num_gaps += 1; |
|
1250 } |
|
1251 } |
|
1252 previous_end = revstart + revsize; |
|
1253 } |
|
1254 if (num_gaps == 0) { |
|
1255 result = PyTuple_Pack(1, list_revs); |
|
1256 goto done; |
|
1257 } |
|
1258 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare); |
|
1259 |
|
1260 /* Slice the largest gap first, they improve the density the most */ |
|
1261 selected_indices = |
|
1262 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t)); |
|
1263 if (selected_indices == NULL) { |
|
1264 PyErr_NoMemory(); |
|
1265 goto bail; |
|
1266 } |
|
1267 |
|
1268 for (i = num_gaps - 1; i >= 0; i--) { |
|
1269 selected_indices[num_selected] = gaps[i].idx; |
|
1270 readdata -= gaps[i].size; |
|
1271 num_selected += 1; |
|
1272 if (readdata <= 0) { |
|
1273 density = 1.0; |
|
1274 } else { |
|
1275 density = (double)chainpayload / (double)readdata; |
|
1276 } |
|
1277 if (density >= targetdensity) { |
|
1278 break; |
|
1279 } |
|
1280 } |
|
1281 qsort(selected_indices, num_selected, sizeof(Py_ssize_t), |
|
1282 &Py_ssize_t_compare); |
|
1283 |
|
1284 /* create the resulting slice */ |
|
1285 allchunks = PyList_New(0); |
|
1286 if (allchunks == NULL) { |
|
1287 goto bail; |
|
1288 } |
|
1289 previdx = 0; |
|
1290 selected_indices[num_selected] = num_revs; |
|
1291 for (i = 0; i <= num_selected; i++) { |
|
1292 Py_ssize_t idx = selected_indices[i]; |
|
1293 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx); |
|
1294 if (endidx < 0) { |
|
1295 goto bail; |
|
1296 } |
|
1297 if (previdx < endidx) { |
|
1298 chunk = PyList_GetSlice(list_revs, previdx, endidx); |
|
1299 if (chunk == NULL) { |
|
1300 goto bail; |
|
1301 } |
|
1302 if (PyList_Append(allchunks, chunk) == -1) { |
|
1303 goto bail; |
|
1304 } |
|
1305 Py_DECREF(chunk); |
|
1306 chunk = NULL; |
|
1307 } |
|
1308 previdx = idx; |
|
1309 } |
|
1310 result = allchunks; |
|
1311 goto done; |
|
1312 |
|
1313 bail: |
|
1314 Py_XDECREF(allchunks); |
|
1315 Py_XDECREF(chunk); |
|
1316 done: |
|
1317 free(revs); |
|
1318 free(gaps); |
|
1319 free(selected_indices); |
|
1320 return result; |
1097 } |
1321 } |
1098 |
1322 |
1099 static inline int nt_level(const char *node, Py_ssize_t level) |
1323 static inline int nt_level(const char *node, Py_ssize_t level) |
1100 { |
1324 { |
1101 int v = node[level >> 1]; |
1325 int v = node[level >> 1]; |
2326 "get head revisions"}, /* Can do filtering since 3.2 */ |
2550 "get head revisions"}, /* Can do filtering since 3.2 */ |
2327 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS, |
2551 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS, |
2328 "get filtered head revisions"}, /* Can always do filtering */ |
2552 "get filtered head revisions"}, /* Can always do filtering */ |
2329 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS, |
2553 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS, |
2330 "determine revisions with deltas to reconstruct fulltext"}, |
2554 "determine revisions with deltas to reconstruct fulltext"}, |
|
2555 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity, |
|
2556 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"}, |
2331 {"append", (PyCFunction)index_append, METH_O, "append an index entry"}, |
2557 {"append", (PyCFunction)index_append, METH_O, "append an index entry"}, |
2332 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, |
2558 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, |
2333 "match a potentially ambiguous node ID"}, |
2559 "match a potentially ambiguous node ID"}, |
2334 {"shortest", (PyCFunction)index_shortest, METH_VARARGS, |
2560 {"shortest", (PyCFunction)index_shortest, METH_VARARGS, |
2335 "find length of shortest hex nodeid of a binary ID"}, |
2561 "find length of shortest hex nodeid of a binary ID"}, |