mercurial/bdiff.c
changeset 29540 4ce1fc91e30a
parent 29539 666832b9e154
child 29541 9631ff5ebbeb
equal deleted inserted replaced
29539:666832b9e154 29540:4ce1fc91e30a
    17 
    17 
    18 #include "compat.h"
    18 #include "compat.h"
    19 #include "util.h"
    19 #include "util.h"
    20 #include "bitmanipulation.h"
    20 #include "bitmanipulation.h"
    21 
    21 
    22 struct line {
    22 struct bdiff_line {
    23 	int hash, n, e;
    23 	int hash, n, e;
    24 	ssize_t len;
    24 	ssize_t len;
    25 	const char *l;
    25 	const char *l;
    26 };
    26 };
    27 
    27 
    28 struct pos {
    28 struct pos {
    29 	int pos, len;
    29 	int pos, len;
    30 };
    30 };
    31 
    31 
    32 struct hunk;
    32 struct bdiff_hunk;
    33 struct hunk {
    33 struct bdiff_hunk {
    34 	int a1, a2, b1, b2;
    34 	int a1, a2, b1, b2;
    35 	struct hunk *next;
    35 	struct bdiff_hunk *next;
    36 };
    36 };
    37 
    37 
    38 static int splitlines(const char *a, ssize_t len, struct line **lr)
    38 static int bdiff_splitlines(const char *a, ssize_t len, struct bdiff_line **lr)
    39 {
    39 {
    40 	unsigned hash;
    40 	unsigned hash;
    41 	int i;
    41 	int i;
    42 	const char *p, *b = a;
    42 	const char *p, *b = a;
    43 	const char * const plast = a + len - 1;
    43 	const char * const plast = a + len - 1;
    44 	struct line *l;
    44 	struct bdiff_line *l;
    45 
    45 
    46 	/* count the lines */
    46 	/* count the lines */
    47 	i = 1; /* extra line for sentinel */
    47 	i = 1; /* extra line for sentinel */
    48 	for (p = a; p < a + len; p++)
    48 	for (p = a; p < a + len; p++)
    49 		if (*p == '\n' || p == plast)
    49 		if (*p == '\n' || p == plast)
    50 			i++;
    50 			i++;
    51 
    51 
    52 	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
    52 	*lr = l = (struct bdiff_line *)malloc(sizeof(struct bdiff_line) * i);
    53 	if (!l)
    53 	if (!l)
    54 		return -1;
    54 		return -1;
    55 
    55 
    56 	/* build the line array and calculate hashes */
    56 	/* build the line array and calculate hashes */
    57 	hash = 0;
    57 	hash = 0;
    75 	l->len = 0;
    75 	l->len = 0;
    76 	l->l = a + len;
    76 	l->l = a + len;
    77 	return i - 1;
    77 	return i - 1;
    78 }
    78 }
    79 
    79 
    80 static inline int cmp(struct line *a, struct line *b)
    80 static inline int cmp(struct bdiff_line *a, struct bdiff_line *b)
    81 {
    81 {
    82 	return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
    82 	return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
    83 }
    83 }
    84 
    84 
    85 static int equatelines(struct line *a, int an, struct line *b, int bn)
    85 static int equatelines(struct bdiff_line *a, int an, struct bdiff_line *b,
       
    86 	int bn)
    86 {
    87 {
    87 	int i, j, buckets = 1, t, scale;
    88 	int i, j, buckets = 1, t, scale;
    88 	struct pos *h = NULL;
    89 	struct pos *h = NULL;
    89 
    90 
    90 	/* build a hash table of the next highest power of 2 */
    91 	/* build a hash table of the next highest power of 2 */
   145 	/* discard hash tables */
   146 	/* discard hash tables */
   146 	free(h);
   147 	free(h);
   147 	return 1;
   148 	return 1;
   148 }
   149 }
   149 
   150 
   150 static int longest_match(struct line *a, struct line *b, struct pos *pos,
   151 static int longest_match(struct bdiff_line *a, struct bdiff_line *b,
       
   152 			struct pos *pos,
   151 			 int a1, int a2, int b1, int b2, int *omi, int *omj)
   153 			 int a1, int a2, int b1, int b2, int *omi, int *omj)
   152 {
   154 {
   153 	int mi = a1, mj = b1, mk = 0, i, j, k, half;
   155 	int mi = a1, mj = b1, mk = 0, i, j, k, half;
   154 
   156 
   155 	/* window our search on large regions to better bound
   157 	/* window our search on large regions to better bound
   206 	*omj = mj;
   208 	*omj = mj;
   207 
   209 
   208 	return mk;
   210 	return mk;
   209 }
   211 }
   210 
   212 
   211 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
   213 static struct bdiff_hunk *recurse(struct bdiff_line *a, struct bdiff_line *b,
   212 			    int a1, int a2, int b1, int b2, struct hunk *l)
   214 				struct pos *pos,
       
   215 			    int a1, int a2, int b1, int b2, struct bdiff_hunk *l)
   213 {
   216 {
   214 	int i, j, k;
   217 	int i, j, k;
   215 
   218 
   216 	while (1) {
   219 	while (1) {
   217 		/* find the longest match in this chunk */
   220 		/* find the longest match in this chunk */
   222 		/* and recurse on the remaining chunks on either side */
   225 		/* and recurse on the remaining chunks on either side */
   223 		l = recurse(a, b, pos, a1, i, b1, j, l);
   226 		l = recurse(a, b, pos, a1, i, b1, j, l);
   224 		if (!l)
   227 		if (!l)
   225 			return NULL;
   228 			return NULL;
   226 
   229 
   227 		l->next = (struct hunk *)malloc(sizeof(struct hunk));
   230 		l->next = (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
   228 		if (!l->next)
   231 		if (!l->next)
   229 			return NULL;
   232 			return NULL;
   230 
   233 
   231 		l = l->next;
   234 		l = l->next;
   232 		l->a1 = i;
   235 		l->a1 = i;
   239 		a1 = i + k;
   242 		a1 = i + k;
   240 		b1 = j + k;
   243 		b1 = j + k;
   241 	}
   244 	}
   242 }
   245 }
   243 
   246 
   244 static int diff(struct line *a, int an, struct line *b, int bn,
   247 static int bdiff_diff(struct bdiff_line *a, int an, struct bdiff_line *b,
   245 		 struct hunk *base)
   248 		int bn, struct bdiff_hunk *base)
   246 {
   249 {
   247 	struct hunk *curr;
   250 	struct bdiff_hunk *curr;
   248 	struct pos *pos;
   251 	struct pos *pos;
   249 	int t, count = 0;
   252 	int t, count = 0;
   250 
   253 
   251 	/* allocate and fill arrays */
   254 	/* allocate and fill arrays */
   252 	t = equatelines(a, an, b, bn);
   255 	t = equatelines(a, an, b, bn);
   258 		curr = recurse(a, b, pos, 0, an, 0, bn, base);
   261 		curr = recurse(a, b, pos, 0, an, 0, bn, base);
   259 		if (!curr)
   262 		if (!curr)
   260 			return -1;
   263 			return -1;
   261 
   264 
   262 		/* sentinel end hunk */
   265 		/* sentinel end hunk */
   263 		curr->next = (struct hunk *)malloc(sizeof(struct hunk));
   266 		curr->next = (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
   264 		if (!curr->next)
   267 		if (!curr->next)
   265 			return -1;
   268 			return -1;
   266 		curr = curr->next;
   269 		curr = curr->next;
   267 		curr->a1 = curr->a2 = an;
   270 		curr->a1 = curr->a2 = an;
   268 		curr->b1 = curr->b2 = bn;
   271 		curr->b1 = curr->b2 = bn;
   271 
   274 
   272 	free(pos);
   275 	free(pos);
   273 
   276 
   274 	/* normalize the hunk list, try to push each hunk towards the end */
   277 	/* normalize the hunk list, try to push each hunk towards the end */
   275 	for (curr = base->next; curr; curr = curr->next) {
   278 	for (curr = base->next; curr; curr = curr->next) {
   276 		struct hunk *next = curr->next;
   279 		struct bdiff_hunk *next = curr->next;
   277 
   280 
   278 		if (!next)
   281 		if (!next)
   279 			break;
   282 			break;
   280 
   283 
   281 		if (curr->a2 == next->a1 || curr->b2 == next->b1)
   284 		if (curr->a2 == next->a1 || curr->b2 == next->b1)
   293 	for (curr = base->next; curr; curr = curr->next)
   296 	for (curr = base->next; curr; curr = curr->next)
   294 		count++;
   297 		count++;
   295 	return count;
   298 	return count;
   296 }
   299 }
   297 
   300 
   298 static void freehunks(struct hunk *l)
   301 static void bdiff_freehunks(struct bdiff_hunk *l)
   299 {
   302 {
   300 	struct hunk *n;
   303 	struct bdiff_hunk *n;
   301 	for (; l; l = n) {
   304 	for (; l; l = n) {
   302 		n = l->next;
   305 		n = l->next;
   303 		free(l);
   306 		free(l);
   304 	}
   307 	}
   305 }
   308 }
   306 
   309 
   307 static PyObject *blocks(PyObject *self, PyObject *args)
   310 static PyObject *blocks(PyObject *self, PyObject *args)
   308 {
   311 {
   309 	PyObject *sa, *sb, *rl = NULL, *m;
   312 	PyObject *sa, *sb, *rl = NULL, *m;
   310 	struct line *a, *b;
   313 	struct bdiff_line *a, *b;
   311 	struct hunk l, *h;
   314 	struct bdiff_hunk l, *h;
   312 	int an, bn, count, pos = 0;
   315 	int an, bn, count, pos = 0;
   313 
   316 
   314 	l.next = NULL;
   317 	l.next = NULL;
   315 
   318 
   316 	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
   319 	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
   317 		return NULL;
   320 		return NULL;
   318 
   321 
   319 	an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
   322 	an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
   320 	bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
   323 	bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
   321 
   324 
   322 	if (!a || !b)
   325 	if (!a || !b)
   323 		goto nomem;
   326 		goto nomem;
   324 
   327 
   325 	count = diff(a, an, b, bn, &l);
   328 	count = bdiff_diff(a, an, b, bn, &l);
   326 	if (count < 0)
   329 	if (count < 0)
   327 		goto nomem;
   330 		goto nomem;
   328 
   331 
   329 	rl = PyList_New(count);
   332 	rl = PyList_New(count);
   330 	if (!rl)
   333 	if (!rl)
   337 	}
   340 	}
   338 
   341 
   339 nomem:
   342 nomem:
   340 	free(a);
   343 	free(a);
   341 	free(b);
   344 	free(b);
   342 	freehunks(l.next);
   345 	bdiff_freehunks(l.next);
   343 	return rl ? rl : PyErr_NoMemory();
   346 	return rl ? rl : PyErr_NoMemory();
   344 }
   347 }
   345 
   348 
   346 static PyObject *bdiff(PyObject *self, PyObject *args)
   349 static PyObject *bdiff(PyObject *self, PyObject *args)
   347 {
   350 {
   348 	char *sa, *sb, *rb;
   351 	char *sa, *sb, *rb;
   349 	PyObject *result = NULL;
   352 	PyObject *result = NULL;
   350 	struct line *al, *bl;
   353 	struct bdiff_line *al, *bl;
   351 	struct hunk l, *h;
   354 	struct bdiff_hunk l, *h;
   352 	int an, bn, count;
   355 	int an, bn, count;
   353 	Py_ssize_t len = 0, la, lb;
   356 	Py_ssize_t len = 0, la, lb;
   354 	PyThreadState *_save;
   357 	PyThreadState *_save;
   355 
   358 
   356 	l.next = NULL;
   359 	l.next = NULL;
   362 		PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
   365 		PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
   363 		return NULL;
   366 		return NULL;
   364 	}
   367 	}
   365 
   368 
   366 	_save = PyEval_SaveThread();
   369 	_save = PyEval_SaveThread();
   367 	an = splitlines(sa, la, &al);
   370 	an = bdiff_splitlines(sa, la, &al);
   368 	bn = splitlines(sb, lb, &bl);
   371 	bn = bdiff_splitlines(sb, lb, &bl);
   369 	if (!al || !bl)
   372 	if (!al || !bl)
   370 		goto nomem;
   373 		goto nomem;
   371 
   374 
   372 	count = diff(al, an, bl, bn, &l);
   375 	count = bdiff_diff(al, an, bl, bn, &l);
   373 	if (count < 0)
   376 	if (count < 0)
   374 		goto nomem;
   377 		goto nomem;
   375 
   378 
   376 	/* calculate length of output */
   379 	/* calculate length of output */
   377 	la = lb = 0;
   380 	la = lb = 0;
   409 nomem:
   412 nomem:
   410 	if (_save)
   413 	if (_save)
   411 		PyEval_RestoreThread(_save);
   414 		PyEval_RestoreThread(_save);
   412 	free(al);
   415 	free(al);
   413 	free(bl);
   416 	free(bl);
   414 	freehunks(l.next);
   417 	bdiff_freehunks(l.next);
   415 	return result ? result : PyErr_NoMemory();
   418 	return result ? result : PyErr_NoMemory();
   416 }
   419 }
   417 
   420 
   418 /*
   421 /*
   419  * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
   422  * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,