17 |
17 |
18 #include "compat.h" |
18 #include "compat.h" |
19 #include "util.h" |
19 #include "util.h" |
20 #include "bitmanipulation.h" |
20 #include "bitmanipulation.h" |
21 |
21 |
22 struct line { |
22 struct bdiff_line { |
23 int hash, n, e; |
23 int hash, n, e; |
24 ssize_t len; |
24 ssize_t len; |
25 const char *l; |
25 const char *l; |
26 }; |
26 }; |
27 |
27 |
28 struct pos { |
28 struct pos { |
29 int pos, len; |
29 int pos, len; |
30 }; |
30 }; |
31 |
31 |
32 struct hunk; |
32 struct bdiff_hunk; |
33 struct hunk { |
33 struct bdiff_hunk { |
34 int a1, a2, b1, b2; |
34 int a1, a2, b1, b2; |
35 struct hunk *next; |
35 struct bdiff_hunk *next; |
36 }; |
36 }; |
37 |
37 |
38 static int splitlines(const char *a, ssize_t len, struct line **lr) |
38 static int bdiff_splitlines(const char *a, ssize_t len, struct bdiff_line **lr) |
39 { |
39 { |
40 unsigned hash; |
40 unsigned hash; |
41 int i; |
41 int i; |
42 const char *p, *b = a; |
42 const char *p, *b = a; |
43 const char * const plast = a + len - 1; |
43 const char * const plast = a + len - 1; |
44 struct line *l; |
44 struct bdiff_line *l; |
45 |
45 |
46 /* count the lines */ |
46 /* count the lines */ |
47 i = 1; /* extra line for sentinel */ |
47 i = 1; /* extra line for sentinel */ |
48 for (p = a; p < a + len; p++) |
48 for (p = a; p < a + len; p++) |
49 if (*p == '\n' || p == plast) |
49 if (*p == '\n' || p == plast) |
50 i++; |
50 i++; |
51 |
51 |
52 *lr = l = (struct line *)malloc(sizeof(struct line) * i); |
52 *lr = l = (struct bdiff_line *)malloc(sizeof(struct bdiff_line) * i); |
53 if (!l) |
53 if (!l) |
54 return -1; |
54 return -1; |
55 |
55 |
56 /* build the line array and calculate hashes */ |
56 /* build the line array and calculate hashes */ |
57 hash = 0; |
57 hash = 0; |
75 l->len = 0; |
75 l->len = 0; |
76 l->l = a + len; |
76 l->l = a + len; |
77 return i - 1; |
77 return i - 1; |
78 } |
78 } |
79 |
79 |
80 static inline int cmp(struct line *a, struct line *b) |
80 static inline int cmp(struct bdiff_line *a, struct bdiff_line *b) |
81 { |
81 { |
82 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len); |
82 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len); |
83 } |
83 } |
84 |
84 |
85 static int equatelines(struct line *a, int an, struct line *b, int bn) |
85 static int equatelines(struct bdiff_line *a, int an, struct bdiff_line *b, |
|
86 int bn) |
86 { |
87 { |
87 int i, j, buckets = 1, t, scale; |
88 int i, j, buckets = 1, t, scale; |
88 struct pos *h = NULL; |
89 struct pos *h = NULL; |
89 |
90 |
90 /* build a hash table of the next highest power of 2 */ |
91 /* build a hash table of the next highest power of 2 */ |
145 /* discard hash tables */ |
146 /* discard hash tables */ |
146 free(h); |
147 free(h); |
147 return 1; |
148 return 1; |
148 } |
149 } |
149 |
150 |
150 static int longest_match(struct line *a, struct line *b, struct pos *pos, |
151 static int longest_match(struct bdiff_line *a, struct bdiff_line *b, |
|
152 struct pos *pos, |
151 int a1, int a2, int b1, int b2, int *omi, int *omj) |
153 int a1, int a2, int b1, int b2, int *omi, int *omj) |
152 { |
154 { |
153 int mi = a1, mj = b1, mk = 0, i, j, k, half; |
155 int mi = a1, mj = b1, mk = 0, i, j, k, half; |
154 |
156 |
155 /* window our search on large regions to better bound |
157 /* window our search on large regions to better bound |
206 *omj = mj; |
208 *omj = mj; |
207 |
209 |
208 return mk; |
210 return mk; |
209 } |
211 } |
210 |
212 |
211 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos, |
213 static struct bdiff_hunk *recurse(struct bdiff_line *a, struct bdiff_line *b, |
212 int a1, int a2, int b1, int b2, struct hunk *l) |
214 struct pos *pos, |
|
215 int a1, int a2, int b1, int b2, struct bdiff_hunk *l) |
213 { |
216 { |
214 int i, j, k; |
217 int i, j, k; |
215 |
218 |
216 while (1) { |
219 while (1) { |
217 /* find the longest match in this chunk */ |
220 /* find the longest match in this chunk */ |
222 /* and recurse on the remaining chunks on either side */ |
225 /* and recurse on the remaining chunks on either side */ |
223 l = recurse(a, b, pos, a1, i, b1, j, l); |
226 l = recurse(a, b, pos, a1, i, b1, j, l); |
224 if (!l) |
227 if (!l) |
225 return NULL; |
228 return NULL; |
226 |
229 |
227 l->next = (struct hunk *)malloc(sizeof(struct hunk)); |
230 l->next = (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk)); |
228 if (!l->next) |
231 if (!l->next) |
229 return NULL; |
232 return NULL; |
230 |
233 |
231 l = l->next; |
234 l = l->next; |
232 l->a1 = i; |
235 l->a1 = i; |
239 a1 = i + k; |
242 a1 = i + k; |
240 b1 = j + k; |
243 b1 = j + k; |
241 } |
244 } |
242 } |
245 } |
243 |
246 |
244 static int diff(struct line *a, int an, struct line *b, int bn, |
247 static int bdiff_diff(struct bdiff_line *a, int an, struct bdiff_line *b, |
245 struct hunk *base) |
248 int bn, struct bdiff_hunk *base) |
246 { |
249 { |
247 struct hunk *curr; |
250 struct bdiff_hunk *curr; |
248 struct pos *pos; |
251 struct pos *pos; |
249 int t, count = 0; |
252 int t, count = 0; |
250 |
253 |
251 /* allocate and fill arrays */ |
254 /* allocate and fill arrays */ |
252 t = equatelines(a, an, b, bn); |
255 t = equatelines(a, an, b, bn); |
258 curr = recurse(a, b, pos, 0, an, 0, bn, base); |
261 curr = recurse(a, b, pos, 0, an, 0, bn, base); |
259 if (!curr) |
262 if (!curr) |
260 return -1; |
263 return -1; |
261 |
264 |
262 /* sentinel end hunk */ |
265 /* sentinel end hunk */ |
263 curr->next = (struct hunk *)malloc(sizeof(struct hunk)); |
266 curr->next = (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk)); |
264 if (!curr->next) |
267 if (!curr->next) |
265 return -1; |
268 return -1; |
266 curr = curr->next; |
269 curr = curr->next; |
267 curr->a1 = curr->a2 = an; |
270 curr->a1 = curr->a2 = an; |
268 curr->b1 = curr->b2 = bn; |
271 curr->b1 = curr->b2 = bn; |
293 for (curr = base->next; curr; curr = curr->next) |
296 for (curr = base->next; curr; curr = curr->next) |
294 count++; |
297 count++; |
295 return count; |
298 return count; |
296 } |
299 } |
297 |
300 |
298 static void freehunks(struct hunk *l) |
301 static void bdiff_freehunks(struct bdiff_hunk *l) |
299 { |
302 { |
300 struct hunk *n; |
303 struct bdiff_hunk *n; |
301 for (; l; l = n) { |
304 for (; l; l = n) { |
302 n = l->next; |
305 n = l->next; |
303 free(l); |
306 free(l); |
304 } |
307 } |
305 } |
308 } |
306 |
309 |
307 static PyObject *blocks(PyObject *self, PyObject *args) |
310 static PyObject *blocks(PyObject *self, PyObject *args) |
308 { |
311 { |
309 PyObject *sa, *sb, *rl = NULL, *m; |
312 PyObject *sa, *sb, *rl = NULL, *m; |
310 struct line *a, *b; |
313 struct bdiff_line *a, *b; |
311 struct hunk l, *h; |
314 struct bdiff_hunk l, *h; |
312 int an, bn, count, pos = 0; |
315 int an, bn, count, pos = 0; |
313 |
316 |
314 l.next = NULL; |
317 l.next = NULL; |
315 |
318 |
316 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) |
319 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) |
317 return NULL; |
320 return NULL; |
318 |
321 |
319 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a); |
322 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a); |
320 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b); |
323 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b); |
321 |
324 |
322 if (!a || !b) |
325 if (!a || !b) |
323 goto nomem; |
326 goto nomem; |
324 |
327 |
325 count = diff(a, an, b, bn, &l); |
328 count = bdiff_diff(a, an, b, bn, &l); |
326 if (count < 0) |
329 if (count < 0) |
327 goto nomem; |
330 goto nomem; |
328 |
331 |
329 rl = PyList_New(count); |
332 rl = PyList_New(count); |
330 if (!rl) |
333 if (!rl) |
337 } |
340 } |
338 |
341 |
339 nomem: |
342 nomem: |
340 free(a); |
343 free(a); |
341 free(b); |
344 free(b); |
342 freehunks(l.next); |
345 bdiff_freehunks(l.next); |
343 return rl ? rl : PyErr_NoMemory(); |
346 return rl ? rl : PyErr_NoMemory(); |
344 } |
347 } |
345 |
348 |
346 static PyObject *bdiff(PyObject *self, PyObject *args) |
349 static PyObject *bdiff(PyObject *self, PyObject *args) |
347 { |
350 { |
348 char *sa, *sb, *rb; |
351 char *sa, *sb, *rb; |
349 PyObject *result = NULL; |
352 PyObject *result = NULL; |
350 struct line *al, *bl; |
353 struct bdiff_line *al, *bl; |
351 struct hunk l, *h; |
354 struct bdiff_hunk l, *h; |
352 int an, bn, count; |
355 int an, bn, count; |
353 Py_ssize_t len = 0, la, lb; |
356 Py_ssize_t len = 0, la, lb; |
354 PyThreadState *_save; |
357 PyThreadState *_save; |
355 |
358 |
356 l.next = NULL; |
359 l.next = NULL; |
362 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large"); |
365 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large"); |
363 return NULL; |
366 return NULL; |
364 } |
367 } |
365 |
368 |
366 _save = PyEval_SaveThread(); |
369 _save = PyEval_SaveThread(); |
367 an = splitlines(sa, la, &al); |
370 an = bdiff_splitlines(sa, la, &al); |
368 bn = splitlines(sb, lb, &bl); |
371 bn = bdiff_splitlines(sb, lb, &bl); |
369 if (!al || !bl) |
372 if (!al || !bl) |
370 goto nomem; |
373 goto nomem; |
371 |
374 |
372 count = diff(al, an, bl, bn, &l); |
375 count = bdiff_diff(al, an, bl, bn, &l); |
373 if (count < 0) |
376 if (count < 0) |
374 goto nomem; |
377 goto nomem; |
375 |
378 |
376 /* calculate length of output */ |
379 /* calculate length of output */ |
377 la = lb = 0; |
380 la = lb = 0; |
409 nomem: |
412 nomem: |
410 if (_save) |
413 if (_save) |
411 PyEval_RestoreThread(_save); |
414 PyEval_RestoreThread(_save); |
412 free(al); |
415 free(al); |
413 free(bl); |
416 free(bl); |
414 freehunks(l.next); |
417 bdiff_freehunks(l.next); |
415 return result ? result : PyErr_NoMemory(); |
418 return result ? result : PyErr_NoMemory(); |
416 } |
419 } |
417 |
420 |
418 /* |
421 /* |
419 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise, |
422 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise, |