mercurial/bdiff.c
changeset 41336 763b45bc4483
parent 38308 068e774ae29e
child 46819 d4ba4d51f85f
equal deleted inserted replaced
41335:b81ca9a3f4e4 41336:763b45bc4483
    33 	const char *const plast = a + len - 1;
    33 	const char *const plast = a + len - 1;
    34 	struct bdiff_line *l;
    34 	struct bdiff_line *l;
    35 
    35 
    36 	/* count the lines */
    36 	/* count the lines */
    37 	i = 1; /* extra line for sentinel */
    37 	i = 1; /* extra line for sentinel */
    38 	for (p = a; p < plast; p++)
    38 	for (p = a; p < plast; p++) {
    39 		if (*p == '\n')
    39 		if (*p == '\n') {
    40 			i++;
    40 			i++;
    41 	if (p == plast)
    41 		}
       
    42 	}
       
    43 	if (p == plast) {
    42 		i++;
    44 		i++;
       
    45 	}
    43 
    46 
    44 	*lr = l = (struct bdiff_line *)calloc(i, sizeof(struct bdiff_line));
    47 	*lr = l = (struct bdiff_line *)calloc(i, sizeof(struct bdiff_line));
    45 	if (!l)
    48 	if (!l) {
    46 		return -1;
    49 		return -1;
       
    50 	}
    47 
    51 
    48 	/* build the line array and calculate hashes */
    52 	/* build the line array and calculate hashes */
    49 	hash = 0;
    53 	hash = 0;
    50 	for (p = a; p < plast; p++) {
    54 	for (p = a; p < plast; p++) {
    51 		hash = HASH(hash, *p);
    55 		hash = HASH(hash, *p);
    88 {
    92 {
    89 	int i, j, buckets = 1, t, scale;
    93 	int i, j, buckets = 1, t, scale;
    90 	struct pos *h = NULL;
    94 	struct pos *h = NULL;
    91 
    95 
    92 	/* build a hash table of the next highest power of 2 */
    96 	/* build a hash table of the next highest power of 2 */
    93 	while (buckets < bn + 1)
    97 	while (buckets < bn + 1) {
    94 		buckets *= 2;
    98 		buckets *= 2;
       
    99 	}
    95 
   100 
    96 	/* try to allocate a large hash table to avoid collisions */
   101 	/* try to allocate a large hash table to avoid collisions */
    97 	for (scale = 4; scale; scale /= 2) {
   102 	for (scale = 4; scale; scale /= 2) {
    98 		h = (struct pos *)calloc(buckets, scale * sizeof(struct pos));
   103 		h = (struct pos *)calloc(buckets, scale * sizeof(struct pos));
    99 		if (h)
   104 		if (h) {
   100 			break;
   105 			break;
   101 	}
   106 		}
   102 
   107 	}
   103 	if (!h)
   108 
       
   109 	if (!h) {
   104 		return 0;
   110 		return 0;
       
   111 	}
   105 
   112 
   106 	buckets = buckets * scale - 1;
   113 	buckets = buckets * scale - 1;
   107 
   114 
   108 	/* clear the hash table */
   115 	/* clear the hash table */
   109 	for (i = 0; i <= buckets; i++) {
   116 	for (i = 0; i <= buckets; i++) {
   113 
   120 
   114 	/* add lines to the hash table chains */
   121 	/* add lines to the hash table chains */
   115 	for (i = 0; i < bn; i++) {
   122 	for (i = 0; i < bn; i++) {
   116 		/* find the equivalence class */
   123 		/* find the equivalence class */
   117 		for (j = b[i].hash & buckets; h[j].pos != -1;
   124 		for (j = b[i].hash & buckets; h[j].pos != -1;
   118 		     j = (j + 1) & buckets)
   125 		     j = (j + 1) & buckets) {
   119 			if (!cmp(b + i, b + h[j].pos))
   126 			if (!cmp(b + i, b + h[j].pos)) {
   120 				break;
   127 				break;
       
   128 			}
       
   129 		}
   121 
   130 
   122 		/* add to the head of the equivalence class */
   131 		/* add to the head of the equivalence class */
   123 		b[i].n = h[j].pos;
   132 		b[i].n = h[j].pos;
   124 		b[i].e = j;
   133 		b[i].e = j;
   125 		h[j].pos = i;
   134 		h[j].pos = i;
   131 
   140 
   132 	/* match items in a to their equivalence class in b */
   141 	/* match items in a to their equivalence class in b */
   133 	for (i = 0; i < an; i++) {
   142 	for (i = 0; i < an; i++) {
   134 		/* find the equivalence class */
   143 		/* find the equivalence class */
   135 		for (j = a[i].hash & buckets; h[j].pos != -1;
   144 		for (j = a[i].hash & buckets; h[j].pos != -1;
   136 		     j = (j + 1) & buckets)
   145 		     j = (j + 1) & buckets) {
   137 			if (!cmp(a + i, b + h[j].pos))
   146 			if (!cmp(a + i, b + h[j].pos)) {
   138 				break;
   147 				break;
       
   148 			}
       
   149 		}
   139 
   150 
   140 		a[i].e = j; /* use equivalence class for quick compare */
   151 		a[i].e = j; /* use equivalence class for quick compare */
   141 		if (h[j].len <= t)
   152 		if (h[j].len <= t) {
   142 			a[i].n = h[j].pos; /* point to head of match list */
   153 			a[i].n = h[j].pos; /* point to head of match list */
   143 		else
   154 		} else {
   144 			a[i].n = -1; /* too popular */
   155 			a[i].n = -1; /* too popular */
       
   156 		}
   145 	}
   157 	}
   146 
   158 
   147 	/* discard hash tables */
   159 	/* discard hash tables */
   148 	free(h);
   160 	free(h);
   149 	return 1;
   161 	return 1;
   156 	int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf;
   168 	int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf;
   157 
   169 
   158 	/* window our search on large regions to better bound
   170 	/* window our search on large regions to better bound
   159 	   worst-case performance. by choosing a window at the end, we
   171 	   worst-case performance. by choosing a window at the end, we
   160 	   reduce skipping overhead on the b chains. */
   172 	   reduce skipping overhead on the b chains. */
   161 	if (a2 - a1 > 30000)
   173 	if (a2 - a1 > 30000) {
   162 		a1 = a2 - 30000;
   174 		a1 = a2 - 30000;
       
   175 	}
   163 
   176 
   164 	half = (a1 + a2 - 1) / 2;
   177 	half = (a1 + a2 - 1) / 2;
   165 	bhalf = (b1 + b2 - 1) / 2;
   178 	bhalf = (b1 + b2 - 1) / 2;
   166 
   179 
   167 	for (i = a1; i < a2; i++) {
   180 	for (i = a1; i < a2; i++) {
   168 		/* skip all lines in b after the current block */
   181 		/* skip all lines in b after the current block */
   169 		for (j = a[i].n; j >= b2; j = b[j].n)
   182 		for (j = a[i].n; j >= b2; j = b[j].n) {
   170 			;
   183 			;
       
   184 		}
   171 
   185 
   172 		/* loop through all lines match a[i] in b */
   186 		/* loop through all lines match a[i] in b */
   173 		for (; j >= b1; j = b[j].n) {
   187 		for (; j >= b1; j = b[j].n) {
   174 			/* does this extend an earlier match? */
   188 			/* does this extend an earlier match? */
   175 			for (k = 1; j - k >= b1 && i - k >= a1; k++) {
   189 			for (k = 1; j - k >= b1 && i - k >= a1; k++) {
   177 				if (pos[j - k].pos == i - k) {
   191 				if (pos[j - k].pos == i - k) {
   178 					k += pos[j - k].len;
   192 					k += pos[j - k].len;
   179 					break;
   193 					break;
   180 				}
   194 				}
   181 				/* previous line mismatch? */
   195 				/* previous line mismatch? */
   182 				if (a[i - k].e != b[j - k].e)
   196 				if (a[i - k].e != b[j - k].e) {
   183 					break;
   197 					break;
       
   198 				}
   184 			}
   199 			}
   185 
   200 
   186 			pos[j].pos = i;
   201 			pos[j].pos = i;
   187 			pos[j].len = k;
   202 			pos[j].len = k;
   188 
   203 
   210 		mi = mi - mk + 1;
   225 		mi = mi - mk + 1;
   211 		mj = mj - mk + 1;
   226 		mj = mj - mk + 1;
   212 	}
   227 	}
   213 
   228 
   214 	/* expand match to include subsequent popular lines */
   229 	/* expand match to include subsequent popular lines */
   215 	while (mi + mk < a2 && mj + mk < b2 && a[mi + mk].e == b[mj + mk].e)
   230 	while (mi + mk < a2 && mj + mk < b2 && a[mi + mk].e == b[mj + mk].e) {
   216 		mk++;
   231 		mk++;
       
   232 	}
   217 
   233 
   218 	*omi = mi;
   234 	*omi = mi;
   219 	*omj = mj;
   235 	*omj = mj;
   220 
   236 
   221 	return mk;
   237 	return mk;
   228 	int i, j, k;
   244 	int i, j, k;
   229 
   245 
   230 	while (1) {
   246 	while (1) {
   231 		/* find the longest match in this chunk */
   247 		/* find the longest match in this chunk */
   232 		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
   248 		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
   233 		if (!k)
   249 		if (!k) {
   234 			return l;
   250 			return l;
       
   251 		}
   235 
   252 
   236 		/* and recurse on the remaining chunks on either side */
   253 		/* and recurse on the remaining chunks on either side */
   237 		l = recurse(a, b, pos, a1, i, b1, j, l);
   254 		l = recurse(a, b, pos, a1, i, b1, j, l);
   238 		if (!l)
   255 		if (!l) {
   239 			return NULL;
   256 			return NULL;
       
   257 		}
   240 
   258 
   241 		l->next =
   259 		l->next =
   242 		    (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
   260 		    (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
   243 		if (!l->next)
   261 		if (!l->next) {
   244 			return NULL;
   262 			return NULL;
       
   263 		}
   245 
   264 
   246 		l = l->next;
   265 		l = l->next;
   247 		l->a1 = i;
   266 		l->a1 = i;
   248 		l->a2 = i + k;
   267 		l->a2 = i + k;
   249 		l->b1 = j;
   268 		l->b1 = j;
   269 
   288 
   270 	if (pos && t) {
   289 	if (pos && t) {
   271 		/* generate the matching block list */
   290 		/* generate the matching block list */
   272 
   291 
   273 		curr = recurse(a, b, pos, 0, an, 0, bn, base);
   292 		curr = recurse(a, b, pos, 0, an, 0, bn, base);
   274 		if (!curr)
   293 		if (!curr) {
   275 			return -1;
   294 			return -1;
       
   295 		}
   276 
   296 
   277 		/* sentinel end hunk */
   297 		/* sentinel end hunk */
   278 		curr->next =
   298 		curr->next =
   279 		    (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
   299 		    (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk));
   280 		if (!curr->next)
   300 		if (!curr->next) {
   281 			return -1;
   301 			return -1;
       
   302 		}
   282 		curr = curr->next;
   303 		curr = curr->next;
   283 		curr->a1 = curr->a2 = an;
   304 		curr->a1 = curr->a2 = an;
   284 		curr->b1 = curr->b2 = bn;
   305 		curr->b1 = curr->b2 = bn;
   285 		curr->next = NULL;
   306 		curr->next = NULL;
   286 	}
   307 	}
   289 
   310 
   290 	/* normalize the hunk list, try to push each hunk towards the end */
   311 	/* normalize the hunk list, try to push each hunk towards the end */
   291 	for (curr = base->next; curr; curr = curr->next) {
   312 	for (curr = base->next; curr; curr = curr->next) {
   292 		struct bdiff_hunk *next = curr->next;
   313 		struct bdiff_hunk *next = curr->next;
   293 
   314 
   294 		if (!next)
   315 		if (!next) {
   295 			break;
   316 			break;
   296 
   317 		}
   297 		if (curr->a2 == next->a1 || curr->b2 == next->b1)
   318 
       
   319 		if (curr->a2 == next->a1 || curr->b2 == next->b1) {
   298 			while (curr->a2 < an && curr->b2 < bn &&
   320 			while (curr->a2 < an && curr->b2 < bn &&
   299 			       next->a1 < next->a2 && next->b1 < next->b2 &&
   321 			       next->a1 < next->a2 && next->b1 < next->b2 &&
   300 			       !cmp(a + curr->a2, b + curr->b2)) {
   322 			       !cmp(a + curr->a2, b + curr->b2)) {
   301 				curr->a2++;
   323 				curr->a2++;
   302 				next->a1++;
   324 				next->a1++;
   303 				curr->b2++;
   325 				curr->b2++;
   304 				next->b1++;
   326 				next->b1++;
   305 			}
   327 			}
   306 	}
   328 		}
   307 
   329 	}
   308 	for (curr = base->next; curr; curr = curr->next)
   330 
       
   331 	for (curr = base->next; curr; curr = curr->next) {
   309 		count++;
   332 		count++;
       
   333 	}
   310 	return count;
   334 	return count;
   311 }
   335 }
   312 
   336 
   313 /* deallocate list of hunks; l may be NULL */
   337 /* deallocate list of hunks; l may be NULL */
   314 void bdiff_freehunks(struct bdiff_hunk *l)
   338 void bdiff_freehunks(struct bdiff_hunk *l)