33 const char *const plast = a + len - 1; |
33 const char *const plast = a + len - 1; |
34 struct bdiff_line *l; |
34 struct bdiff_line *l; |
35 |
35 |
36 /* count the lines */ |
36 /* count the lines */ |
37 i = 1; /* extra line for sentinel */ |
37 i = 1; /* extra line for sentinel */ |
38 for (p = a; p < plast; p++) |
38 for (p = a; p < plast; p++) { |
39 if (*p == '\n') |
39 if (*p == '\n') { |
40 i++; |
40 i++; |
41 if (p == plast) |
41 } |
|
42 } |
|
43 if (p == plast) { |
42 i++; |
44 i++; |
|
45 } |
43 |
46 |
44 *lr = l = (struct bdiff_line *)calloc(i, sizeof(struct bdiff_line)); |
47 *lr = l = (struct bdiff_line *)calloc(i, sizeof(struct bdiff_line)); |
45 if (!l) |
48 if (!l) { |
46 return -1; |
49 return -1; |
|
50 } |
47 |
51 |
48 /* build the line array and calculate hashes */ |
52 /* build the line array and calculate hashes */ |
49 hash = 0; |
53 hash = 0; |
50 for (p = a; p < plast; p++) { |
54 for (p = a; p < plast; p++) { |
51 hash = HASH(hash, *p); |
55 hash = HASH(hash, *p); |
88 { |
92 { |
89 int i, j, buckets = 1, t, scale; |
93 int i, j, buckets = 1, t, scale; |
90 struct pos *h = NULL; |
94 struct pos *h = NULL; |
91 |
95 |
92 /* build a hash table of the next highest power of 2 */ |
96 /* build a hash table of the next highest power of 2 */ |
93 while (buckets < bn + 1) |
97 while (buckets < bn + 1) { |
94 buckets *= 2; |
98 buckets *= 2; |
|
99 } |
95 |
100 |
96 /* try to allocate a large hash table to avoid collisions */ |
101 /* try to allocate a large hash table to avoid collisions */ |
97 for (scale = 4; scale; scale /= 2) { |
102 for (scale = 4; scale; scale /= 2) { |
98 h = (struct pos *)calloc(buckets, scale * sizeof(struct pos)); |
103 h = (struct pos *)calloc(buckets, scale * sizeof(struct pos)); |
99 if (h) |
104 if (h) { |
100 break; |
105 break; |
101 } |
106 } |
102 |
107 } |
103 if (!h) |
108 |
|
109 if (!h) { |
104 return 0; |
110 return 0; |
|
111 } |
105 |
112 |
106 buckets = buckets * scale - 1; |
113 buckets = buckets * scale - 1; |
107 |
114 |
108 /* clear the hash table */ |
115 /* clear the hash table */ |
109 for (i = 0; i <= buckets; i++) { |
116 for (i = 0; i <= buckets; i++) { |
113 |
120 |
114 /* add lines to the hash table chains */ |
121 /* add lines to the hash table chains */ |
115 for (i = 0; i < bn; i++) { |
122 for (i = 0; i < bn; i++) { |
116 /* find the equivalence class */ |
123 /* find the equivalence class */ |
117 for (j = b[i].hash & buckets; h[j].pos != -1; |
124 for (j = b[i].hash & buckets; h[j].pos != -1; |
118 j = (j + 1) & buckets) |
125 j = (j + 1) & buckets) { |
119 if (!cmp(b + i, b + h[j].pos)) |
126 if (!cmp(b + i, b + h[j].pos)) { |
120 break; |
127 break; |
|
128 } |
|
129 } |
121 |
130 |
122 /* add to the head of the equivalence class */ |
131 /* add to the head of the equivalence class */ |
123 b[i].n = h[j].pos; |
132 b[i].n = h[j].pos; |
124 b[i].e = j; |
133 b[i].e = j; |
125 h[j].pos = i; |
134 h[j].pos = i; |
131 |
140 |
132 /* match items in a to their equivalence class in b */ |
141 /* match items in a to their equivalence class in b */ |
133 for (i = 0; i < an; i++) { |
142 for (i = 0; i < an; i++) { |
134 /* find the equivalence class */ |
143 /* find the equivalence class */ |
135 for (j = a[i].hash & buckets; h[j].pos != -1; |
144 for (j = a[i].hash & buckets; h[j].pos != -1; |
136 j = (j + 1) & buckets) |
145 j = (j + 1) & buckets) { |
137 if (!cmp(a + i, b + h[j].pos)) |
146 if (!cmp(a + i, b + h[j].pos)) { |
138 break; |
147 break; |
|
148 } |
|
149 } |
139 |
150 |
140 a[i].e = j; /* use equivalence class for quick compare */ |
151 a[i].e = j; /* use equivalence class for quick compare */ |
141 if (h[j].len <= t) |
152 if (h[j].len <= t) { |
142 a[i].n = h[j].pos; /* point to head of match list */ |
153 a[i].n = h[j].pos; /* point to head of match list */ |
143 else |
154 } else { |
144 a[i].n = -1; /* too popular */ |
155 a[i].n = -1; /* too popular */ |
|
156 } |
145 } |
157 } |
146 |
158 |
147 /* discard hash tables */ |
159 /* discard hash tables */ |
148 free(h); |
160 free(h); |
149 return 1; |
161 return 1; |
156 int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf; |
168 int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf; |
157 |
169 |
158 /* window our search on large regions to better bound |
170 /* window our search on large regions to better bound |
159 worst-case performance. by choosing a window at the end, we |
171 worst-case performance. by choosing a window at the end, we |
160 reduce skipping overhead on the b chains. */ |
172 reduce skipping overhead on the b chains. */ |
161 if (a2 - a1 > 30000) |
173 if (a2 - a1 > 30000) { |
162 a1 = a2 - 30000; |
174 a1 = a2 - 30000; |
|
175 } |
163 |
176 |
164 half = (a1 + a2 - 1) / 2; |
177 half = (a1 + a2 - 1) / 2; |
165 bhalf = (b1 + b2 - 1) / 2; |
178 bhalf = (b1 + b2 - 1) / 2; |
166 |
179 |
167 for (i = a1; i < a2; i++) { |
180 for (i = a1; i < a2; i++) { |
168 /* skip all lines in b after the current block */ |
181 /* skip all lines in b after the current block */ |
169 for (j = a[i].n; j >= b2; j = b[j].n) |
182 for (j = a[i].n; j >= b2; j = b[j].n) { |
170 ; |
183 ; |
|
184 } |
171 |
185 |
172 /* loop through all lines match a[i] in b */ |
186 /* loop through all lines match a[i] in b */ |
173 for (; j >= b1; j = b[j].n) { |
187 for (; j >= b1; j = b[j].n) { |
174 /* does this extend an earlier match? */ |
188 /* does this extend an earlier match? */ |
175 for (k = 1; j - k >= b1 && i - k >= a1; k++) { |
189 for (k = 1; j - k >= b1 && i - k >= a1; k++) { |
228 int i, j, k; |
244 int i, j, k; |
229 |
245 |
230 while (1) { |
246 while (1) { |
231 /* find the longest match in this chunk */ |
247 /* find the longest match in this chunk */ |
232 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j); |
248 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j); |
233 if (!k) |
249 if (!k) { |
234 return l; |
250 return l; |
|
251 } |
235 |
252 |
236 /* and recurse on the remaining chunks on either side */ |
253 /* and recurse on the remaining chunks on either side */ |
237 l = recurse(a, b, pos, a1, i, b1, j, l); |
254 l = recurse(a, b, pos, a1, i, b1, j, l); |
238 if (!l) |
255 if (!l) { |
239 return NULL; |
256 return NULL; |
|
257 } |
240 |
258 |
241 l->next = |
259 l->next = |
242 (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk)); |
260 (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk)); |
243 if (!l->next) |
261 if (!l->next) { |
244 return NULL; |
262 return NULL; |
|
263 } |
245 |
264 |
246 l = l->next; |
265 l = l->next; |
247 l->a1 = i; |
266 l->a1 = i; |
248 l->a2 = i + k; |
267 l->a2 = i + k; |
249 l->b1 = j; |
268 l->b1 = j; |
269 |
288 |
270 if (pos && t) { |
289 if (pos && t) { |
271 /* generate the matching block list */ |
290 /* generate the matching block list */ |
272 |
291 |
273 curr = recurse(a, b, pos, 0, an, 0, bn, base); |
292 curr = recurse(a, b, pos, 0, an, 0, bn, base); |
274 if (!curr) |
293 if (!curr) { |
275 return -1; |
294 return -1; |
|
295 } |
276 |
296 |
277 /* sentinel end hunk */ |
297 /* sentinel end hunk */ |
278 curr->next = |
298 curr->next = |
279 (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk)); |
299 (struct bdiff_hunk *)malloc(sizeof(struct bdiff_hunk)); |
280 if (!curr->next) |
300 if (!curr->next) { |
281 return -1; |
301 return -1; |
|
302 } |
282 curr = curr->next; |
303 curr = curr->next; |
283 curr->a1 = curr->a2 = an; |
304 curr->a1 = curr->a2 = an; |
284 curr->b1 = curr->b2 = bn; |
305 curr->b1 = curr->b2 = bn; |
285 curr->next = NULL; |
306 curr->next = NULL; |
286 } |
307 } |
289 |
310 |
290 /* normalize the hunk list, try to push each hunk towards the end */ |
311 /* normalize the hunk list, try to push each hunk towards the end */ |
291 for (curr = base->next; curr; curr = curr->next) { |
312 for (curr = base->next; curr; curr = curr->next) { |
292 struct bdiff_hunk *next = curr->next; |
313 struct bdiff_hunk *next = curr->next; |
293 |
314 |
294 if (!next) |
315 if (!next) { |
295 break; |
316 break; |
296 |
317 } |
297 if (curr->a2 == next->a1 || curr->b2 == next->b1) |
318 |
|
319 if (curr->a2 == next->a1 || curr->b2 == next->b1) { |
298 while (curr->a2 < an && curr->b2 < bn && |
320 while (curr->a2 < an && curr->b2 < bn && |
299 next->a1 < next->a2 && next->b1 < next->b2 && |
321 next->a1 < next->a2 && next->b1 < next->b2 && |
300 !cmp(a + curr->a2, b + curr->b2)) { |
322 !cmp(a + curr->a2, b + curr->b2)) { |
301 curr->a2++; |
323 curr->a2++; |
302 next->a1++; |
324 next->a1++; |
303 curr->b2++; |
325 curr->b2++; |
304 next->b1++; |
326 next->b1++; |
305 } |
327 } |
306 } |
328 } |
307 |
329 } |
308 for (curr = base->next; curr; curr = curr->next) |
330 |
|
331 for (curr = base->next; curr; curr = curr->next) { |
309 count++; |
332 count++; |
|
333 } |
310 return count; |
334 return count; |
311 } |
335 } |
312 |
336 |
313 /* deallocate list of hunks; l may be NULL */ |
337 /* deallocate list of hunks; l may be NULL */ |
314 void bdiff_freehunks(struct bdiff_hunk *l) |
338 void bdiff_freehunks(struct bdiff_hunk *l) |