Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/cmdutil.py @ 34682:7e3001b74ab3
tersestatus: re-implement the functionality to terse the status
The previous terse status implementation was hacking around os.listdir() and was
flaky. There have been a lot of instances of mercurial buildbots failing
and google's internal builds failing because of the
hacky implementation of terse status. Even though I wrote the last
implementation but it was hard for me to find the reason for the flake.
The new implementation can be slower than the old one but is clean and easy to
understand.
In this we create a node object for each directory and create a tree
like structure starting from the root of the working copy. While building the
tree like structure we store some information on the nodes which will be helpful
for deciding later whether we can terse the dir or not.
Once the whole tree is build we traverse and built the list of files for each
status with required tersing.
There is no behaviour change as the old test, test-status-terse.t passes with
the new implementation.
Differential Revision: https://phab.mercurial-scm.org/D985
author | Pulkit Goyal <7895pulkit@gmail.com> |
---|---|
date | Fri, 06 Oct 2017 20:54:23 +0530 |
parents | 2a360445afa0 |
children | 3d6d4b12128e |
comparison
equal
deleted
inserted
replaced
34681:4dc8a2ee0f4f | 34682:7e3001b74ab3 |
---|---|
400 with repo.wlock(): | 400 with repo.wlock(): |
401 return recordfunc(ui, repo, message, match, opts) | 401 return recordfunc(ui, repo, message, match, opts) |
402 | 402 |
403 return commit(ui, repo, recordinwlock, pats, opts) | 403 return commit(ui, repo, recordinwlock, pats, opts) |
404 | 404 |
405 def tersestatus(root, statlist, status, ignorefn, ignore): | 405 |
406 # extracted at module level as it's required each time a file will be added | |
407 # to dirnode class object below | |
408 pathsep = pycompat.ossep | |
409 | |
410 class dirnode(object): | |
406 """ | 411 """ |
407 Returns a list of statuses with directory collapsed if all the files in the | 412 represents a directory in user working copy |
408 directory has the same status. | 413 |
414 stores information which is required for purpose of tersing the status | |
415 | |
416 path is the path to the directory | |
417 | |
418 statuses is a set of statuses of all files in this directory (this includes | |
419 all the files in all the subdirectories too) | |
420 | |
421 files is a list of files which are direct child of this directory | |
422 | |
423 subdirs is a dictionary of sub-directory name as the key and it's own | |
424 dirnode object as the value | |
409 """ | 425 """ |
410 | 426 |
411 def numfiles(dirname): | 427 def __init__(self, dirpath): |
428 self.path = dirpath | |
429 self.statuses = set([]) | |
430 self.files = [] | |
431 self.subdirs = {} | |
432 | |
433 def _addfileindir(self, filename, status): | |
434 """ adds a file in this directory as the direct child """ | |
435 self.files.append((filename, status)) | |
436 | |
437 def addfile(self, filename, status): | |
412 """ | 438 """ |
413 Calculates the number of tracked files in a given directory which also | 439 adds a file which is present in this directory to its direct parent |
414 includes files which were removed or deleted. Considers ignored files | 440 dirnode object |
415 if ignore argument is True or 'i' is present in status argument. | 441 |
442 if the file is not direct child of this directory, we traverse to the | |
443 directory of which this file is a direct child of and add the file there | |
416 """ | 444 """ |
417 if lencache.get(dirname): | 445 |
418 return lencache[dirname] | 446 # the filename contains a path separator, it means it's not the direct |
419 if 'i' in status or ignore: | 447 # child of this directory |
420 def match(localpath): | 448 if pathsep in filename: |
421 absolutepath = os.path.join(root, localpath) | 449 subdir, filep = filename.split(pathsep, 1) |
422 if os.path.isdir(absolutepath) and isemptydir(absolutepath): | 450 |
423 return True | 451 # does the dirnode object for subdir exists |
424 return False | 452 if subdir not in self.subdirs: |
453 subdirpath = os.path.join(self.path, subdir) | |
454 self.subdirs[subdir] = dirnode(subdirpath) | |
455 | |
456 # try adding the file in subdir | |
457 self.subdirs[subdir].addfile(filep, status) | |
458 | |
425 else: | 459 else: |
426 def match(localpath): | 460 self._addfileindir(filename, status) |
427 # there can be directory whose all the files are ignored and | 461 |
428 # hence the drectory should also be ignored while counting | 462 if status not in self.statuses: |
429 # number of files or subdirs in it's parent directory. This | 463 self.statuses.add(status) |
430 # checks the same. | 464 |
431 # XXX: We need a better logic here. | 465 def _addfilestotersed(path, files, tersedict): |
432 if os.path.isdir(os.path.join(root, localpath)): | 466 """ adds files to the their respective status list in the final tersed list |
433 return isignoreddir(localpath) | 467 |
434 else: | 468 path is the path of parent directory of the file |
435 # XXX: there can be files which have the ignored pattern but | 469 files is a list of tuple where each tuple is (filename, status) |
436 # are not ignored. That leads to bug in counting number of | 470 tersedict is a dictonary which contains each status abbreviation as key and |
437 # tracked files in the directory. | 471 list of files and tersed dirs in that status as value |
438 return ignorefn(localpath) | 472 """ |
439 lendir = 0 | 473 for f, st in files: |
440 abspath = os.path.join(root, dirname) | 474 tersedict[st].append(os.path.join(path, f)) |
441 # There might be cases when a directory does not exists as the whole | 475 |
442 # directory can be removed and/or deleted. | 476 def _processtersestatus(subdir, tersedict, terseargs): |
443 try: | 477 """a recursive function which process status for a certain directory. |
444 for f in os.listdir(abspath): | 478 |
445 localpath = os.path.join(dirname, f) | 479 subdir is an oject of dirnode class defined below. each object of dirnode |
446 if not match(localpath): | 480 class has a set of statuses which files in that directory has. This ease our |
447 lendir += 1 | 481 check whether we can terse that directory or not. |
448 except OSError: | 482 |
449 pass | 483 tersedict is a dictonary which contains each status abbreviation as key and |
450 lendir += len(absentdir.get(dirname, [])) | 484 list of files and tersed dirs in that status as value. In each function call |
451 lencache[dirname] = lendir | 485 we are passing the same dict and adding files and dirs to it. |
452 return lendir | 486 |
453 | 487 terseargs is the string of arguments passed by the user with `--terse` flag. |
454 def isemptydir(abspath): | 488 |
455 """ | 489 Following are the cases which can happen: |
456 Check whether a directory is empty or not, i.e. there is no files in the | 490 |
457 directory and all its subdirectories. | 491 1) All the files in the directory (including all the files in its |
458 """ | 492 subdirectories) share the same status and the user has asked us to terse |
459 for f in os.listdir(abspath): | 493 that status. -> we add the directory name to status list and return |
460 fullpath = os.path.join(abspath, f) | 494 |
461 if os.path.isdir(fullpath): | 495 2) If '1)' does not happen, we do following: |
462 # recursion here | 496 |
463 ret = isemptydir(fullpath) | 497 a) Add all the files which are in this directory (only the ones in |
464 if not ret: | 498 this directory, not the subdirs) to their respective status list |
465 return False | 499 |
466 else: | 500 b) Recurse the function on all the subdirectories of this directory |
467 return False | 501 """ |
468 return True | 502 |
469 | 503 if len(subdir.statuses) == 1: |
470 def isignoreddir(localpath): | 504 onlyst = subdir.statuses.pop() |
471 """Return True if `localpath` directory is ignored or contains only | 505 |
472 ignored files and should hence be considered ignored. | 506 # Making sure we terse only when the status abbreviation is passed as |
473 """ | 507 # terse argument |
474 dirpath = os.path.join(root, localpath) | 508 if onlyst in terseargs: |
475 if ignorefn(dirpath): | 509 tersedict[onlyst].append(subdir.path + pycompat.ossep) |
476 return True | 510 return |
477 for f in os.listdir(dirpath): | 511 |
478 filepath = os.path.join(dirpath, f) | 512 # add the files to status list |
479 if os.path.isdir(filepath): | 513 _addfilestotersed(subdir.path, subdir.files, tersedict) |
480 # recursion here | 514 |
481 ret = isignoreddir(os.path.join(localpath, f)) | 515 #recurse on the subdirs |
482 if not ret: | 516 for dirobj in subdir.subdirs.values(): |
483 return False | 517 _processtersestatus(dirobj, tersedict, terseargs) |
484 else: | 518 |
485 if not ignorefn(os.path.join(localpath, f)): | 519 def tersedir(statuslist, terseargs): |
486 return False | 520 """ |
487 return True | 521 terses the status if all the files in a directory shares the same status |
488 | 522 |
489 def absentones(removedfiles, missingfiles): | 523 statuslist is scmutil.status() object which contains a list of files for |
490 """ | 524 each status. |
491 Returns a dictionary of directories with files in it which are either | 525 terseargs is string which is passed by the user as the argument to `--terse` |
492 removed or missing (deleted) in them. | 526 flag. |
493 """ | 527 |
494 absentdir = {} | 528 The function makes a tree of objects of dirnode class, and at each node it |
495 absentfiles = removedfiles + missingfiles | 529 stores the information required to know whether we can terse a certain |
496 while absentfiles: | 530 directory or not. |
497 f = absentfiles.pop() | 531 |
498 par = os.path.dirname(f) | 532 tersedict (defined in the function) is a dictionary which has one word key |
499 if par == '': | 533 for each status and a list of files and dir in that status as the respective |
500 continue | 534 value. The dictionary is passed to other helper functions which builds it. |
501 # we need to store files rather than number of files as some files | 535 """ |
502 # or subdirectories in a directory can be counted twice. This is | 536 # the order matters here as that is used to produce final list |
503 # also we have used sets here. | 537 allst = ('m', 'a', 'r', 'd', 'u', 'i', 'c') |
504 try: | 538 |
505 absentdir[par].add(f) | 539 # checking the argument validity |
506 except KeyError: | 540 for s in terseargs: |
507 absentdir[par] = set([f]) | 541 if s not in allst: |
508 absentfiles.append(par) | 542 raise error.Abort(_("'%s' not recognized") % s) |
509 return absentdir | 543 |
510 | 544 # creating a dirnode object for the root of the repo |
511 indexes = {'m': 0, 'a': 1, 'r': 2, 'd': 3, 'u': 4, 'i': 5, 'c': 6} | 545 rootobj = dirnode('') |
512 # get a dictonary of directories and files which are missing as os.listdir() | 546 pstatus = ('modified', 'added', 'deleted', 'clean', 'unknown', |
513 # won't be able to list them. | 547 'ignored', 'removed') |
514 absentdir = absentones(statlist[2], statlist[3]) | 548 |
515 finalrs = [[]] * len(indexes) | 549 tersedict = {} |
516 didsomethingchanged = False | 550 for attrname in pstatus: |
517 # dictionary to store number of files and subdir in a directory so that we | 551 for f in getattr(statuslist, attrname): |
518 # don't compute that again. | 552 rootobj.addfile(f, attrname[0]) |
519 lencache = {} | 553 tersedict[attrname[0]] = [] |
520 | 554 |
521 for st in pycompat.bytestr(status): | 555 # we won't be tersing the root dir, so add files in it |
522 | 556 _addfilestotersed(rootobj.path, rootobj.files, tersedict) |
523 try: | 557 |
524 ind = indexes[st] | 558 # process each sub-directory and build tersedict |
525 except KeyError: | 559 for subdir in rootobj.subdirs.values(): |
526 # TODO: Need a better error message here | 560 _processtersestatus(subdir, tersedict, terseargs) |
527 raise error.Abort("'%s' not recognized" % st) | 561 |
528 | 562 tersedlist = [] |
529 sfiles = statlist[ind] | 563 for st in allst: |
530 if not sfiles: | 564 tersedict[st].sort() |
531 continue | 565 tersedlist.append(tersedict[st]) |
532 pardict = {} | 566 |
533 for a in sfiles: | 567 return tersedlist |
534 par = os.path.dirname(a) | |
535 pardict.setdefault(par, []).append(a) | |
536 | |
537 rs = [] | |
538 newls = [] | |
539 for par, files in sorted(pardict.iteritems()): | |
540 lenpar = numfiles(par) | |
541 if lenpar == len(files): | |
542 newls.append(par) | |
543 | |
544 if not newls: | |
545 continue | |
546 | |
547 while newls: | |
548 newel = newls.pop() | |
549 if newel == '': | |
550 continue | |
551 parn = os.path.dirname(newel) | |
552 pardict[newel] = [] | |
553 # Adding pycompat.ossep as newel is a directory. | |
554 pardict.setdefault(parn, []).append(newel + pycompat.ossep) | |
555 lenpar = numfiles(parn) | |
556 if lenpar == len(pardict[parn]): | |
557 newls.append(parn) | |
558 | |
559 # dict.values() for Py3 compatibility | |
560 for files in pardict.values(): | |
561 rs.extend(files) | |
562 | |
563 rs.sort() | |
564 finalrs[ind] = rs | |
565 didsomethingchanged = True | |
566 | |
567 # If nothing is changed, make sure the order of files is preserved. | |
568 if not didsomethingchanged: | |
569 return statlist | |
570 | |
571 for x in xrange(len(indexes)): | |
572 if not finalrs[x]: | |
573 finalrs[x] = statlist[x] | |
574 | |
575 return finalrs | |
576 | 568 |
577 def _commentlines(raw): | 569 def _commentlines(raw): |
578 '''Surround lineswith a comment char and a new line''' | 570 '''Surround lineswith a comment char and a new line''' |
579 lines = raw.splitlines() | 571 lines = raw.splitlines() |
580 commentedlines = ['# %s' % line for line in lines] | 572 commentedlines = ['# %s' % line for line in lines] |